blob: b509401999921c112defa2f39195390c6a48d9ae [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
njn9f207462009-03-10 22:02:09 +000012 Copyright (C) 2008-2009 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000046#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
sewardj23f12002009-07-24 08:45:08 +0000150static void zsm_sset_range ( Addr, SizeT, SVal );
151static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000152static void zsm_flush_cache ( void );
153
154#endif /* ! __HB_ZSM_H */
155
156
sewardjf98e1c02008-10-25 16:22:41 +0000157/* Round a up to the next multiple of N. N must be a power of 2 */
158#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159/* Round a down to the next multiple of N. N must be a power of 2 */
160#define ROUNDDN(a, N) ((a) & ~(N-1))
161
162
163
164/* ------ User-supplied RC functions ------ */
165static void(*rcinc)(SVal) = NULL;
166static void(*rcdec)(SVal) = NULL;
167
168
169/* ------ CacheLine ------ */
170
171#define N_LINE_BITS 6 /* must be >= 3 */
172#define N_LINE_ARANGE (1 << N_LINE_BITS)
173#define N_LINE_TREES (N_LINE_ARANGE >> 3)
174
175typedef
176 struct {
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
179 }
180 CacheLine;
181
182#define TREE_DESCR_16_0 (1<<0)
183#define TREE_DESCR_32_0 (1<<1)
184#define TREE_DESCR_16_1 (1<<2)
185#define TREE_DESCR_64 (1<<3)
186#define TREE_DESCR_16_2 (1<<4)
187#define TREE_DESCR_32_1 (1<<5)
188#define TREE_DESCR_16_3 (1<<6)
189#define TREE_DESCR_8_0 (1<<7)
190#define TREE_DESCR_8_1 (1<<8)
191#define TREE_DESCR_8_2 (1<<9)
192#define TREE_DESCR_8_3 (1<<10)
193#define TREE_DESCR_8_4 (1<<11)
194#define TREE_DESCR_8_5 (1<<12)
195#define TREE_DESCR_8_6 (1<<13)
196#define TREE_DESCR_8_7 (1<<14)
197#define TREE_DESCR_DTY (1<<15)
198
199typedef
200 struct {
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
203 dict indexes */
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
206 }
207 LineZ; /* compressed rep for a cache line */
208
209typedef
210 struct {
211 Bool inUse;
212 SVal w64s[N_LINE_ARANGE];
213 }
214 LineF; /* full rep for a cache line */
215
216/* Shadow memory.
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
224
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
227*/
228#define N_SECMAP_BITS 13
229#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
230
231// # CacheLines held by a SecMap
232#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
233
234/* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
238
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
243
244 RC obligations: the RCs presented to the user include exactly
245 the values in:
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
248
249 Hence the following actions at the following transitions are required:
250
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
255*/
256typedef
257 struct {
258 UInt magic;
259 LineZ linesZ[N_SECMAP_ZLINES];
260 LineF* linesF;
261 UInt linesF_size;
262 }
263 SecMap;
264
265#define SecMap_MAGIC 0x571e58cbU
266
267static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
269}
270
271/* ------ Cache ------ */
272
273#define N_WAY_BITS 16
274#define N_WAY_NENT (1 << N_WAY_BITS)
275
276/* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
282 with a bogus tag. */
283typedef
284 struct {
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
287 }
288 Cache;
289
290static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
292 a CacheLine. */
293 return 0 == (tag & (N_LINE_ARANGE - 1));
294}
295
296
297/* --------- Primary data structures --------- */
298
299/* Shadow memory primary map */
300static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301static Cache cache_shmem;
302
303
304static UWord stats__secmaps_search = 0; // # SM finds
305static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315static UWord stats__cache_F_fetches = 0; // # F lines fetched
316static UWord stats__cache_F_wbacks = 0; // # F lines written back
317static UWord stats__cache_invals = 0; // # cache invals
318static UWord stats__cache_flushes = 0; // # cache flushes
319static UWord stats__cache_totrefs = 0; // # total accesses
320static UWord stats__cache_totmisses = 0; // # misses
321static ULong stats__cache_make_New_arange = 0; // total arange made New
322static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000324static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000338static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
344
345static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
346 return a & ~(N_SECMAP_ARANGE - 1);
347}
348static inline UWord shmem__get_SecMap_offset ( Addr a ) {
349 return a & (N_SECMAP_ARANGE - 1);
350}
351
352
353/*----------------------------------------------------------------*/
354/*--- map_shmem :: WordFM Addr SecMap ---*/
355/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
356/*----------------------------------------------------------------*/
357
358/*--------------- SecMap allocation --------------- */
359
360static HChar* shmem__bigchunk_next = NULL;
361static HChar* shmem__bigchunk_end1 = NULL;
362
363static void* shmem__bigchunk_alloc ( SizeT n )
364{
365 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
366 tl_assert(n > 0);
367 n = VG_ROUNDUP(n, 16);
368 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
369 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
370 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
371 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
372 if (0)
373 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
374 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
375 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
376 if (shmem__bigchunk_next == NULL)
377 VG_(out_of_memory_NORETURN)(
378 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
379 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
380 }
381 tl_assert(shmem__bigchunk_next);
382 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
383 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
384 shmem__bigchunk_next += n;
385 return shmem__bigchunk_next - n;
386}
387
388static SecMap* shmem__alloc_SecMap ( void )
389{
390 Word i, j;
391 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
392 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
393 tl_assert(sm);
394 sm->magic = SecMap_MAGIC;
395 for (i = 0; i < N_SECMAP_ZLINES; i++) {
396 sm->linesZ[i].dict[0] = SVal_NOACCESS;
397 sm->linesZ[i].dict[1] = SVal_INVALID;
398 sm->linesZ[i].dict[2] = SVal_INVALID;
399 sm->linesZ[i].dict[3] = SVal_INVALID;
400 for (j = 0; j < N_LINE_ARANGE/4; j++)
401 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
402 }
403 sm->linesF = NULL;
404 sm->linesF_size = 0;
405 stats__secmaps_allocd++;
406 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
407 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
408 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
409 return sm;
410}
411
412typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
413static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
414
415static SecMap* shmem__find_SecMap ( Addr ga )
416{
417 SecMap* sm = NULL;
418 Addr gaKey = shmem__round_to_SecMap_base(ga);
419 // Cache
420 stats__secmaps_search++;
421 if (LIKELY(gaKey == smCache[0].gaKey))
422 return smCache[0].sm;
423 if (LIKELY(gaKey == smCache[1].gaKey)) {
424 SMCacheEnt tmp = smCache[0];
425 smCache[0] = smCache[1];
426 smCache[1] = tmp;
427 return smCache[0].sm;
428 }
429 if (gaKey == smCache[2].gaKey) {
430 SMCacheEnt tmp = smCache[1];
431 smCache[1] = smCache[2];
432 smCache[2] = tmp;
433 return smCache[1].sm;
434 }
435 // end Cache
436 stats__secmaps_search_slow++;
437 if (VG_(lookupFM)( map_shmem,
438 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
439 tl_assert(sm != NULL);
440 smCache[2] = smCache[1];
441 smCache[1] = smCache[0];
442 smCache[0].gaKey = gaKey;
443 smCache[0].sm = sm;
444 } else {
445 tl_assert(sm == NULL);
446 }
447 return sm;
448}
449
450static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
451{
452 SecMap* sm = shmem__find_SecMap ( ga );
453 if (LIKELY(sm)) {
454 return sm;
455 } else {
456 /* create a new one */
457 Addr gaKey = shmem__round_to_SecMap_base(ga);
458 sm = shmem__alloc_SecMap();
459 tl_assert(sm);
460 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
461 return sm;
462 }
463}
464
465
466/* ------------ LineF and LineZ related ------------ */
467
468static void rcinc_LineF ( LineF* lineF ) {
469 UWord i;
470 tl_assert(lineF->inUse);
471 for (i = 0; i < N_LINE_ARANGE; i++)
472 rcinc(lineF->w64s[i]);
473}
474
475static void rcdec_LineF ( LineF* lineF ) {
476 UWord i;
477 tl_assert(lineF->inUse);
478 for (i = 0; i < N_LINE_ARANGE; i++)
479 rcdec(lineF->w64s[i]);
480}
481
482static void rcinc_LineZ ( LineZ* lineZ ) {
483 tl_assert(lineZ->dict[0] != SVal_INVALID);
484 rcinc(lineZ->dict[0]);
485 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
486 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
487 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
488}
489
490static void rcdec_LineZ ( LineZ* lineZ ) {
491 tl_assert(lineZ->dict[0] != SVal_INVALID);
492 rcdec(lineZ->dict[0]);
493 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
494 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
495 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
496}
497
498inline
499static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
500 Word bix, shft, mask, prep;
501 tl_assert(ix >= 0);
502 bix = ix >> 2;
503 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
504 mask = 3 << shft;
505 prep = b2 << shft;
506 arr[bix] = (arr[bix] & ~mask) | prep;
507}
508
509inline
510static UWord read_twobit_array ( UChar* arr, UWord ix ) {
511 Word bix, shft;
512 tl_assert(ix >= 0);
513 bix = ix >> 2;
514 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
515 return (arr[bix] >> shft) & 3;
516}
517
518/* Given address 'tag', find either the Z or F line containing relevant
519 data, so it can be read into the cache.
520*/
521static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
522 /*OUT*/LineF** fp, Addr tag ) {
523 LineZ* lineZ;
524 LineF* lineF;
525 UWord zix;
526 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
527 UWord smoff = shmem__get_SecMap_offset(tag);
528 /* since smoff is derived from a valid tag, it should be
529 cacheline-aligned. */
530 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
531 zix = smoff >> N_LINE_BITS;
532 tl_assert(zix < N_SECMAP_ZLINES);
533 lineZ = &sm->linesZ[zix];
534 lineF = NULL;
535 if (lineZ->dict[0] == SVal_INVALID) {
536 UInt fix = (UInt)lineZ->dict[1];
537 tl_assert(sm->linesF);
538 tl_assert(sm->linesF_size > 0);
539 tl_assert(fix >= 0 && fix < sm->linesF_size);
540 lineF = &sm->linesF[fix];
541 tl_assert(lineF->inUse);
542 lineZ = NULL;
543 }
544 *zp = lineZ;
545 *fp = lineF;
546}
547
548/* Given address 'tag', return the relevant SecMap and the index of
549 the LineZ within it, in the expectation that the line is to be
550 overwritten. Regardless of whether 'tag' is currently associated
551 with a Z or F representation, to rcdec on the current
552 representation, in recognition of the fact that the contents are
553 just about to be overwritten. */
554static __attribute__((noinline))
555void find_Z_for_writing ( /*OUT*/SecMap** smp,
556 /*OUT*/Word* zixp,
557 Addr tag ) {
558 LineZ* lineZ;
559 LineF* lineF;
560 UWord zix;
561 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
562 UWord smoff = shmem__get_SecMap_offset(tag);
563 /* since smoff is derived from a valid tag, it should be
564 cacheline-aligned. */
565 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
566 zix = smoff >> N_LINE_BITS;
567 tl_assert(zix < N_SECMAP_ZLINES);
568 lineZ = &sm->linesZ[zix];
569 lineF = NULL;
570 /* re RCs, we are freeing up this LineZ/LineF so that new data can
571 be parked in it. Hence have to rcdec it accordingly. */
572 /* If lineZ has an associated lineF, free it up. */
573 if (lineZ->dict[0] == SVal_INVALID) {
574 UInt fix = (UInt)lineZ->dict[1];
575 tl_assert(sm->linesF);
576 tl_assert(sm->linesF_size > 0);
577 tl_assert(fix >= 0 && fix < sm->linesF_size);
578 lineF = &sm->linesF[fix];
579 tl_assert(lineF->inUse);
580 rcdec_LineF(lineF);
581 lineF->inUse = False;
582 } else {
583 rcdec_LineZ(lineZ);
584 }
585 *smp = sm;
586 *zixp = zix;
587}
588
589static __attribute__((noinline))
590void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
591 UInt i, new_size;
592 LineF* nyu;
593
594 if (sm->linesF) {
595 tl_assert(sm->linesF_size > 0);
596 } else {
597 tl_assert(sm->linesF_size == 0);
598 }
599
600 if (sm->linesF) {
601 for (i = 0; i < sm->linesF_size; i++) {
602 if (!sm->linesF[i].inUse) {
603 *fixp = (Word)i;
604 return;
605 }
606 }
607 }
608
609 /* No free F line found. Expand existing array and try again. */
610 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
611 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
612 new_size * sizeof(LineF) );
613 tl_assert(nyu);
614
615 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
616 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
617 * sizeof(LineF);
618
619 if (0)
620 VG_(printf)("SM %p: expand F array from %d to %d\n",
621 sm, (Int)sm->linesF_size, new_size);
622
623 for (i = 0; i < new_size; i++)
624 nyu[i].inUse = False;
625
626 if (sm->linesF) {
627 for (i = 0; i < sm->linesF_size; i++) {
628 tl_assert(sm->linesF[i].inUse);
629 nyu[i] = sm->linesF[i];
630 }
631 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
632 HG_(free)(sm->linesF);
633 }
634
635 sm->linesF = nyu;
636 sm->linesF_size = new_size;
637
638 for (i = 0; i < sm->linesF_size; i++) {
639 if (!sm->linesF[i].inUse) {
640 *fixp = (Word)i;
641 return;
642 }
643 }
644
645 /*NOTREACHED*/
646 tl_assert(0);
647}
648
649
650/* ------------ CacheLine and implicit-tree related ------------ */
651
652__attribute__((unused))
653static void pp_CacheLine ( CacheLine* cl ) {
654 Word i;
655 if (!cl) {
656 VG_(printf)("%s","pp_CacheLine(NULL)\n");
657 return;
658 }
659 for (i = 0; i < N_LINE_TREES; i++)
660 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
661 for (i = 0; i < N_LINE_ARANGE; i++)
662 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
663}
664
665static UChar descr_to_validbits ( UShort descr )
666{
667 /* a.k.a Party Time for gcc's constant folder */
668# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
669 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
670 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
671 ( (b8_5) << 12) | ( (b8_4) << 11) | \
672 ( (b8_3) << 10) | ( (b8_2) << 9) | \
673 ( (b8_1) << 8) | ( (b8_0) << 7) | \
674 ( (b16_3) << 6) | ( (b32_1) << 5) | \
675 ( (b16_2) << 4) | ( (b64) << 3) | \
676 ( (b16_1) << 2) | ( (b32_0) << 1) | \
677 ( (b16_0) << 0) ) )
678
679# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
680 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
681 ( (bit5) << 5) | ( (bit4) << 4) | \
682 ( (bit3) << 3) | ( (bit2) << 2) | \
683 ( (bit1) << 1) | ( (bit0) << 0) ) )
684
685 /* these should all get folded out at compile time */
686 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
687 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
688 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
689 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
690 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
691 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
692 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
693 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
694 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
695
696 switch (descr) {
697 /*
698 +--------------------------------- TREE_DESCR_8_7
699 | +------------------- TREE_DESCR_8_0
700 | | +---------------- TREE_DESCR_16_3
701 | | | +-------------- TREE_DESCR_32_1
702 | | | | +------------ TREE_DESCR_16_2
703 | | | | | +--------- TREE_DESCR_64
704 | | | | | | +------ TREE_DESCR_16_1
705 | | | | | | | +---- TREE_DESCR_32_0
706 | | | | | | | | +-- TREE_DESCR_16_0
707 | | | | | | | | |
708 | | | | | | | | | GRANULARITY, 7 -> 0 */
709 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
710 return BYTE(1,1,1,1,1,1,1,1);
711 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
712 return BYTE(1,1,0,1,1,1,1,1);
713 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
714 return BYTE(0,1,1,1,1,1,1,1);
715 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
716 return BYTE(0,1,0,1,1,1,1,1);
717
718 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
719 return BYTE(1,1,1,1,1,1,0,1);
720 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
721 return BYTE(1,1,0,1,1,1,0,1);
722 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
723 return BYTE(0,1,1,1,1,1,0,1);
724 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
725 return BYTE(0,1,0,1,1,1,0,1);
726
727 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
728 return BYTE(1,1,1,1,0,1,1,1);
729 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
730 return BYTE(1,1,0,1,0,1,1,1);
731 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
732 return BYTE(0,1,1,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
734 return BYTE(0,1,0,1,0,1,1,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
737 return BYTE(1,1,1,1,0,1,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
739 return BYTE(1,1,0,1,0,1,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
741 return BYTE(0,1,1,1,0,1,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
743 return BYTE(0,1,0,1,0,1,0,1);
744
745 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
746 return BYTE(0,0,0,1,1,1,1,1);
747 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
748 return BYTE(0,0,0,1,1,1,0,1);
749 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
750 return BYTE(0,0,0,1,0,1,1,1);
751 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
752 return BYTE(0,0,0,1,0,1,0,1);
753
754 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
755 return BYTE(1,1,1,1,0,0,0,1);
756 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
757 return BYTE(1,1,0,1,0,0,0,1);
758 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
759 return BYTE(0,1,1,1,0,0,0,1);
760 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
761 return BYTE(0,1,0,1,0,0,0,1);
762
763 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
764 return BYTE(0,0,0,1,0,0,0,1);
765
766 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
767 return BYTE(0,0,0,0,0,0,0,1);
768
769 default: return BYTE(0,0,0,0,0,0,0,0);
770 /* INVALID - any valid descr produces at least one
771 valid bit in tree[0..7]*/
772 }
773 /* NOTREACHED*/
774 tl_assert(0);
775
776# undef DESCR
777# undef BYTE
778}
779
780__attribute__((unused))
781static Bool is_sane_Descr ( UShort descr ) {
782 return descr_to_validbits(descr) != 0;
783}
784
785static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
786 VG_(sprintf)(dst,
787 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
788 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
789 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
790 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
791 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
792 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
793 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
794 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
795 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
796 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
797 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
798 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
803 );
804}
805static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
806 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
807 (Int)((byte & 128) ? 1 : 0),
808 (Int)((byte & 64) ? 1 : 0),
809 (Int)((byte & 32) ? 1 : 0),
810 (Int)((byte & 16) ? 1 : 0),
811 (Int)((byte & 8) ? 1 : 0),
812 (Int)((byte & 4) ? 1 : 0),
813 (Int)((byte & 2) ? 1 : 0),
814 (Int)((byte & 1) ? 1 : 0)
815 );
816}
817
818static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
819 Word i;
820 UChar validbits = descr_to_validbits(descr);
821 HChar buf[128], buf2[128];
822 if (validbits == 0)
823 goto bad;
824 for (i = 0; i < 8; i++) {
825 if (validbits & (1<<i)) {
826 if (tree[i] == SVal_INVALID)
827 goto bad;
828 } else {
829 if (tree[i] != SVal_INVALID)
830 goto bad;
831 }
832 }
833 return True;
834 bad:
835 sprintf_Descr( buf, descr );
836 sprintf_Byte( buf2, validbits );
837 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
838 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
839 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
840 for (i = 0; i < 8; i++)
841 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
842 VG_(printf)("%s","}\n");
843 return 0;
844}
845
846static Bool is_sane_CacheLine ( CacheLine* cl )
847{
848 Word tno, cloff;
849
850 if (!cl) goto bad;
851
852 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
853 UShort descr = cl->descrs[tno];
854 SVal* tree = &cl->svals[cloff];
855 if (!is_sane_Descr_and_Tree(descr, tree))
856 goto bad;
857 }
858 tl_assert(cloff == N_LINE_ARANGE);
859 return True;
860 bad:
861 pp_CacheLine(cl);
862 return False;
863}
864
865static UShort normalise_tree ( /*MOD*/SVal* tree )
866{
867 UShort descr;
868 /* pre: incoming tree[0..7] does not have any invalid shvals, in
869 particular no zeroes. */
870 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
871 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
872 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
873 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
874 tl_assert(0);
875
876 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
877 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
878 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
879 /* build 16-bit layer */
880 if (tree[1] == tree[0]) {
881 tree[1] = SVal_INVALID;
882 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
883 descr |= TREE_DESCR_16_0;
884 }
885 if (tree[3] == tree[2]) {
886 tree[3] = SVal_INVALID;
887 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
888 descr |= TREE_DESCR_16_1;
889 }
890 if (tree[5] == tree[4]) {
891 tree[5] = SVal_INVALID;
892 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
893 descr |= TREE_DESCR_16_2;
894 }
895 if (tree[7] == tree[6]) {
896 tree[7] = SVal_INVALID;
897 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
898 descr |= TREE_DESCR_16_3;
899 }
900 /* build 32-bit layer */
901 if (tree[2] == tree[0]
902 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
903 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
904 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
905 descr |= TREE_DESCR_32_0;
906 }
907 if (tree[6] == tree[4]
908 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
909 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
910 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
911 descr |= TREE_DESCR_32_1;
912 }
913 /* build 64-bit layer */
914 if (tree[4] == tree[0]
915 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
916 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
917 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
918 descr |= TREE_DESCR_64;
919 }
920 return descr;
921}
922
923/* This takes a cacheline where all the data is at the leaves
924 (w8[..]) and builds a correctly normalised tree. */
925static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
926{
927 Word tno, cloff;
928 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
929 SVal* tree = &cl->svals[cloff];
930 cl->descrs[tno] = normalise_tree( tree );
931 }
932 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000933 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000934 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
935 stats__cline_normalises++;
936}
937
938
939typedef struct { UChar count; SVal sval; } CountedSVal;
940
941static
942void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
943 /*OUT*/Word* dstUsedP,
944 Word nDst, CacheLine* src )
945{
946 Word tno, cloff, dstUsed;
947
948 tl_assert(nDst == N_LINE_ARANGE);
949 dstUsed = 0;
950
951 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
952 UShort descr = src->descrs[tno];
953 SVal* tree = &src->svals[cloff];
954
955 /* sequentialise the tree described by (descr,tree). */
956# define PUT(_n,_v) \
957 do { dst[dstUsed ].count = (_n); \
958 dst[dstUsed++].sval = (_v); \
959 } while (0)
960
961 /* byte 0 */
962 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
963 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
964 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
965 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
966 /* byte 1 */
967 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
968 /* byte 2 */
969 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
970 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
971 /* byte 3 */
972 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
973 /* byte 4 */
974 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
975 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
976 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
977 /* byte 5 */
978 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
979 /* byte 6 */
980 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
981 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
982 /* byte 7 */
983 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
984
985# undef PUT
986 /* END sequentialise the tree described by (descr,tree). */
987
988 }
989 tl_assert(cloff == N_LINE_ARANGE);
990 tl_assert(dstUsed <= nDst);
991
992 *dstUsedP = dstUsed;
993}
994
995/* Write the cacheline 'wix' to backing store. Where it ends up
996 is determined by its tag field. */
997static __attribute__((noinline)) void cacheline_wback ( UWord wix )
998{
999 Word i, j, k, m;
1000 Addr tag;
1001 SecMap* sm;
1002 CacheLine* cl;
1003 LineZ* lineZ;
1004 LineF* lineF;
1005 Word zix, fix, csvalsUsed;
1006 CountedSVal csvals[N_LINE_ARANGE];
1007 SVal sv;
1008
1009 if (0)
1010 VG_(printf)("scache wback line %d\n", (Int)wix);
1011
1012 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1013
1014 tag = cache_shmem.tags0[wix];
1015 cl = &cache_shmem.lyns0[wix];
1016
1017 /* The cache line may have been invalidated; if so, ignore it. */
1018 if (!is_valid_scache_tag(tag))
1019 return;
1020
1021 /* Where are we going to put it? */
1022 sm = NULL;
1023 lineZ = NULL;
1024 lineF = NULL;
1025 zix = fix = -1;
1026
1027 /* find the Z line to write in and rcdec it or the associated F
1028 line. */
1029 find_Z_for_writing( &sm, &zix, tag );
1030
1031 tl_assert(sm);
1032 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1033 lineZ = &sm->linesZ[zix];
1034
1035 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001036 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001037 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1038
1039 csvalsUsed = -1;
1040 sequentialise_CacheLine( csvals, &csvalsUsed,
1041 N_LINE_ARANGE, cl );
1042 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1043 if (0) VG_(printf)("%lu ", csvalsUsed);
1044
1045 lineZ->dict[0] = lineZ->dict[1]
1046 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1047
1048 /* i indexes actual shadow values, k is cursor in csvals */
1049 i = 0;
1050 for (k = 0; k < csvalsUsed; k++) {
1051
1052 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001053 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001054 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1055 /* do we already have it? */
1056 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1057 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1058 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1059 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1060 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001061 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001062 tl_assert(sv != SVal_INVALID);
1063 if (lineZ->dict[0]
1064 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1065 if (lineZ->dict[1]
1066 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1067 if (lineZ->dict[2]
1068 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1069 if (lineZ->dict[3]
1070 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1071 break; /* we'll have to use the f rep */
1072 dict_ok:
1073 m = csvals[k].count;
1074 if (m == 8) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 write_twobit_array( lineZ->ix2s, i+1, j );
1077 write_twobit_array( lineZ->ix2s, i+2, j );
1078 write_twobit_array( lineZ->ix2s, i+3, j );
1079 write_twobit_array( lineZ->ix2s, i+4, j );
1080 write_twobit_array( lineZ->ix2s, i+5, j );
1081 write_twobit_array( lineZ->ix2s, i+6, j );
1082 write_twobit_array( lineZ->ix2s, i+7, j );
1083 i += 8;
1084 }
1085 else if (m == 4) {
1086 write_twobit_array( lineZ->ix2s, i+0, j );
1087 write_twobit_array( lineZ->ix2s, i+1, j );
1088 write_twobit_array( lineZ->ix2s, i+2, j );
1089 write_twobit_array( lineZ->ix2s, i+3, j );
1090 i += 4;
1091 }
1092 else if (m == 1) {
1093 write_twobit_array( lineZ->ix2s, i+0, j );
1094 i += 1;
1095 }
1096 else if (m == 2) {
1097 write_twobit_array( lineZ->ix2s, i+0, j );
1098 write_twobit_array( lineZ->ix2s, i+1, j );
1099 i += 2;
1100 }
1101 else {
1102 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1103 }
1104
1105 }
1106
1107 if (LIKELY(i == N_LINE_ARANGE)) {
1108 /* Construction of the compressed representation was
1109 successful. */
1110 rcinc_LineZ(lineZ);
1111 stats__cache_Z_wbacks++;
1112 } else {
1113 /* Cannot use the compressed(z) representation. Use the full(f)
1114 rep instead. */
1115 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1116 alloc_F_for_writing( sm, &fix );
1117 tl_assert(sm->linesF);
1118 tl_assert(sm->linesF_size > 0);
1119 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1120 lineF = &sm->linesF[fix];
1121 tl_assert(!lineF->inUse);
1122 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1123 lineZ->dict[1] = (SVal)fix;
1124 lineF->inUse = True;
1125 i = 0;
1126 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001127 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001128 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1129 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001130 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001131 tl_assert(sv != SVal_INVALID);
1132 for (m = csvals[k].count; m > 0; m--) {
1133 lineF->w64s[i] = sv;
1134 i++;
1135 }
1136 }
1137 tl_assert(i == N_LINE_ARANGE);
1138 rcinc_LineF(lineF);
1139 stats__cache_F_wbacks++;
1140 }
sewardjf98e1c02008-10-25 16:22:41 +00001141}
1142
1143/* Fetch the cacheline 'wix' from the backing store. The tag
1144 associated with 'wix' is assumed to have already been filled in;
1145 hence that is used to determine where in the backing store to read
1146 from. */
1147static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1148{
1149 Word i;
1150 Addr tag;
1151 CacheLine* cl;
1152 LineZ* lineZ;
1153 LineF* lineF;
1154
1155 if (0)
1156 VG_(printf)("scache fetch line %d\n", (Int)wix);
1157
1158 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1159
1160 tag = cache_shmem.tags0[wix];
1161 cl = &cache_shmem.lyns0[wix];
1162
1163 /* reject nonsense requests */
1164 tl_assert(is_valid_scache_tag(tag));
1165
1166 lineZ = NULL;
1167 lineF = NULL;
1168 find_ZF_for_reading( &lineZ, &lineF, tag );
1169 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1170
1171 /* expand the data into the bottom layer of the tree, then get
1172 cacheline_normalise to build the descriptor array. */
1173 if (lineF) {
1174 tl_assert(lineF->inUse);
1175 for (i = 0; i < N_LINE_ARANGE; i++) {
1176 cl->svals[i] = lineF->w64s[i];
1177 }
1178 stats__cache_F_fetches++;
1179 } else {
1180 for (i = 0; i < N_LINE_ARANGE; i++) {
1181 SVal sv;
1182 UWord ix = read_twobit_array( lineZ->ix2s, i );
1183 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1184 sv = lineZ->dict[ix];
1185 tl_assert(sv != SVal_INVALID);
1186 cl->svals[i] = sv;
1187 }
1188 stats__cache_Z_fetches++;
1189 }
1190 normalise_CacheLine( cl );
1191}
1192
1193static void shmem__invalidate_scache ( void ) {
1194 Word wix;
1195 if (0) VG_(printf)("%s","scache inval\n");
1196 tl_assert(!is_valid_scache_tag(1));
1197 for (wix = 0; wix < N_WAY_NENT; wix++) {
1198 cache_shmem.tags0[wix] = 1/*INVALID*/;
1199 }
1200 stats__cache_invals++;
1201}
1202
1203static void shmem__flush_and_invalidate_scache ( void ) {
1204 Word wix;
1205 Addr tag;
1206 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1207 tl_assert(!is_valid_scache_tag(1));
1208 for (wix = 0; wix < N_WAY_NENT; wix++) {
1209 tag = cache_shmem.tags0[wix];
1210 if (tag == 1/*INVALID*/) {
1211 /* already invalid; nothing to do */
1212 } else {
1213 tl_assert(is_valid_scache_tag(tag));
1214 cacheline_wback( wix );
1215 }
1216 cache_shmem.tags0[wix] = 1/*INVALID*/;
1217 }
1218 stats__cache_flushes++;
1219 stats__cache_invals++;
1220}
1221
1222
1223static inline Bool aligned16 ( Addr a ) {
1224 return 0 == (a & 1);
1225}
1226static inline Bool aligned32 ( Addr a ) {
1227 return 0 == (a & 3);
1228}
1229static inline Bool aligned64 ( Addr a ) {
1230 return 0 == (a & 7);
1231}
1232static inline UWord get_cacheline_offset ( Addr a ) {
1233 return (UWord)(a & (N_LINE_ARANGE - 1));
1234}
1235static inline Addr cacheline_ROUNDUP ( Addr a ) {
1236 return ROUNDUP(a, N_LINE_ARANGE);
1237}
1238static inline Addr cacheline_ROUNDDN ( Addr a ) {
1239 return ROUNDDN(a, N_LINE_ARANGE);
1240}
1241static inline UWord get_treeno ( Addr a ) {
1242 return get_cacheline_offset(a) >> 3;
1243}
1244static inline UWord get_tree_offset ( Addr a ) {
1245 return a & 7;
1246}
1247
1248static __attribute__((noinline))
1249 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1250static inline CacheLine* get_cacheline ( Addr a )
1251{
1252 /* tag is 'a' with the in-line offset masked out,
1253 eg a[31]..a[4] 0000 */
1254 Addr tag = a & ~(N_LINE_ARANGE - 1);
1255 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1256 stats__cache_totrefs++;
1257 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1258 return &cache_shmem.lyns0[wix];
1259 } else {
1260 return get_cacheline_MISS( a );
1261 }
1262}
1263
1264static __attribute__((noinline))
1265 CacheLine* get_cacheline_MISS ( Addr a )
1266{
1267 /* tag is 'a' with the in-line offset masked out,
1268 eg a[31]..a[4] 0000 */
1269
1270 CacheLine* cl;
1271 Addr* tag_old_p;
1272 Addr tag = a & ~(N_LINE_ARANGE - 1);
1273 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1274
1275 tl_assert(tag != cache_shmem.tags0[wix]);
1276
1277 /* Dump the old line into the backing store. */
1278 stats__cache_totmisses++;
1279
1280 cl = &cache_shmem.lyns0[wix];
1281 tag_old_p = &cache_shmem.tags0[wix];
1282
1283 if (is_valid_scache_tag( *tag_old_p )) {
1284 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001285 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001286 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1287 cacheline_wback( wix );
1288 }
1289 /* and reload the new one */
1290 *tag_old_p = tag;
1291 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001292 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001293 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1294 return cl;
1295}
1296
1297static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1298 stats__cline_64to32pulldown++;
1299 switch (toff) {
1300 case 0: case 4:
1301 tl_assert(descr & TREE_DESCR_64);
1302 tree[4] = tree[0];
1303 descr &= ~TREE_DESCR_64;
1304 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1305 break;
1306 default:
1307 tl_assert(0);
1308 }
1309 return descr;
1310}
1311
1312static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1313 stats__cline_32to16pulldown++;
1314 switch (toff) {
1315 case 0: case 2:
1316 if (!(descr & TREE_DESCR_32_0)) {
1317 descr = pulldown_to_32(tree, 0, descr);
1318 }
1319 tl_assert(descr & TREE_DESCR_32_0);
1320 tree[2] = tree[0];
1321 descr &= ~TREE_DESCR_32_0;
1322 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1323 break;
1324 case 4: case 6:
1325 if (!(descr & TREE_DESCR_32_1)) {
1326 descr = pulldown_to_32(tree, 4, descr);
1327 }
1328 tl_assert(descr & TREE_DESCR_32_1);
1329 tree[6] = tree[4];
1330 descr &= ~TREE_DESCR_32_1;
1331 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1332 break;
1333 default:
1334 tl_assert(0);
1335 }
1336 return descr;
1337}
1338
1339static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1340 stats__cline_16to8pulldown++;
1341 switch (toff) {
1342 case 0: case 1:
1343 if (!(descr & TREE_DESCR_16_0)) {
1344 descr = pulldown_to_16(tree, 0, descr);
1345 }
1346 tl_assert(descr & TREE_DESCR_16_0);
1347 tree[1] = tree[0];
1348 descr &= ~TREE_DESCR_16_0;
1349 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1350 break;
1351 case 2: case 3:
1352 if (!(descr & TREE_DESCR_16_1)) {
1353 descr = pulldown_to_16(tree, 2, descr);
1354 }
1355 tl_assert(descr & TREE_DESCR_16_1);
1356 tree[3] = tree[2];
1357 descr &= ~TREE_DESCR_16_1;
1358 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1359 break;
1360 case 4: case 5:
1361 if (!(descr & TREE_DESCR_16_2)) {
1362 descr = pulldown_to_16(tree, 4, descr);
1363 }
1364 tl_assert(descr & TREE_DESCR_16_2);
1365 tree[5] = tree[4];
1366 descr &= ~TREE_DESCR_16_2;
1367 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1368 break;
1369 case 6: case 7:
1370 if (!(descr & TREE_DESCR_16_3)) {
1371 descr = pulldown_to_16(tree, 6, descr);
1372 }
1373 tl_assert(descr & TREE_DESCR_16_3);
1374 tree[7] = tree[6];
1375 descr &= ~TREE_DESCR_16_3;
1376 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1377 break;
1378 default:
1379 tl_assert(0);
1380 }
1381 return descr;
1382}
1383
1384
1385static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1386 UShort mask;
1387 switch (toff) {
1388 case 0:
1389 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1390 tl_assert( (descr & mask) == mask );
1391 descr &= ~mask;
1392 descr |= TREE_DESCR_16_0;
1393 break;
1394 case 2:
1395 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1396 tl_assert( (descr & mask) == mask );
1397 descr &= ~mask;
1398 descr |= TREE_DESCR_16_1;
1399 break;
1400 case 4:
1401 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1402 tl_assert( (descr & mask) == mask );
1403 descr &= ~mask;
1404 descr |= TREE_DESCR_16_2;
1405 break;
1406 case 6:
1407 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1408 tl_assert( (descr & mask) == mask );
1409 descr &= ~mask;
1410 descr |= TREE_DESCR_16_3;
1411 break;
1412 default:
1413 tl_assert(0);
1414 }
1415 return descr;
1416}
1417
1418static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1419 UShort mask;
1420 switch (toff) {
1421 case 0:
1422 if (!(descr & TREE_DESCR_16_0))
1423 descr = pullup_descr_to_16(descr, 0);
1424 if (!(descr & TREE_DESCR_16_1))
1425 descr = pullup_descr_to_16(descr, 2);
1426 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1427 tl_assert( (descr & mask) == mask );
1428 descr &= ~mask;
1429 descr |= TREE_DESCR_32_0;
1430 break;
1431 case 4:
1432 if (!(descr & TREE_DESCR_16_2))
1433 descr = pullup_descr_to_16(descr, 4);
1434 if (!(descr & TREE_DESCR_16_3))
1435 descr = pullup_descr_to_16(descr, 6);
1436 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1437 tl_assert( (descr & mask) == mask );
1438 descr &= ~mask;
1439 descr |= TREE_DESCR_32_1;
1440 break;
1441 default:
1442 tl_assert(0);
1443 }
1444 return descr;
1445}
1446
1447static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1448 switch (toff) {
1449 case 0: case 4:
1450 return 0 != (descr & TREE_DESCR_64);
1451 default:
1452 tl_assert(0);
1453 }
1454}
1455
1456static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1457 switch (toff) {
1458 case 0:
1459 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1460 case 2:
1461 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1462 case 4:
1463 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1464 case 6:
1465 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1466 default:
1467 tl_assert(0);
1468 }
1469}
1470
1471/* ------------ Cache management ------------ */
1472
1473static void zsm_flush_cache ( void )
1474{
1475 shmem__flush_and_invalidate_scache();
1476}
1477
1478
1479static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1480{
1481 tl_assert( sizeof(UWord) == sizeof(Addr) );
1482
1483 rcinc = p_rcinc;
1484 rcdec = p_rcdec;
1485
1486 tl_assert(map_shmem == NULL);
1487 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1488 HG_(free),
1489 NULL/*unboxed UWord cmp*/);
1490 tl_assert(map_shmem != NULL);
1491 shmem__invalidate_scache();
1492
1493 /* a SecMap must contain an integral number of CacheLines */
1494 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1495 /* also ... a CacheLine holds an integral number of trees */
1496 tl_assert(0 == (N_LINE_ARANGE % 8));
1497}
1498
1499/////////////////////////////////////////////////////////////////
1500/////////////////////////////////////////////////////////////////
1501// //
1502// SECTION END compressed shadow memory //
1503// //
1504/////////////////////////////////////////////////////////////////
1505/////////////////////////////////////////////////////////////////
1506
1507
1508
1509/////////////////////////////////////////////////////////////////
1510/////////////////////////////////////////////////////////////////
1511// //
1512// SECTION BEGIN vts primitives //
1513// //
1514/////////////////////////////////////////////////////////////////
1515/////////////////////////////////////////////////////////////////
1516
1517#ifndef __HB_VTS_H
1518#define __HB_VTS_H
1519
1520/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1521 lowest 30 bits of an SVal. */
1522typedef UInt VtsID;
1523#define VtsID_INVALID 0xFFFFFFFF
1524
1525/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1526 a backlink for the caller's convenience. Since we have no idea
1527 what to set that to in the library, it always gets set to
1528 VtsID_INVALID. */
1529typedef
1530 struct {
1531 VtsID id;
1532 XArray* ts; /* XArray* ScalarTS(abstract) */
1533 }
1534 VTS;
1535
1536
1537/* Create a new, empty VTS. */
sewardj23f12002009-07-24 08:45:08 +00001538static VTS* VTS__new ( void );
sewardjf98e1c02008-10-25 16:22:41 +00001539
1540/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001541static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001542
1543/* Create a new singleton VTS. */
sewardj23f12002009-07-24 08:45:08 +00001544static VTS* VTS__singleton ( Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001545
1546/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1547 not modified. */
sewardj23f12002009-07-24 08:45:08 +00001548static VTS* VTS__tick ( Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001549
1550/* Return a new VTS constructed as the join (max) of the 2 args.
1551 Neither arg is modified. */
sewardj23f12002009-07-24 08:45:08 +00001552static VTS* VTS__join ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001553
sewardj23f12002009-07-24 08:45:08 +00001554/* Compute the partial ordering relation of the two args. Although we
1555 could be completely general and return an enumeration value (EQ,
1556 LT, GT, UN), in fact we only need LEQ, and so we may as well
1557 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001558
sewardj23f12002009-07-24 08:45:08 +00001559 Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
1560 the returned Thr* indicates the discovered point for which they are
1561 not. There may be more than one such point, but we only care about
1562 seeing one of them, not all of them. This rather strange
1563 convention is used because sometimes we want to know the actual
1564 index at which they first differ. */
1565static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001566
1567/* Compute an arbitrary structural (total) ordering on the two args,
1568 based on their VCs, so they can be looked up in a table, tree, etc.
1569 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001570static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001571
1572/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001573static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001574
1575/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001576static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001577
1578#endif /* ! __HB_VTS_H */
1579
1580
1581/*--------------- to do with Vector Timestamps ---------------*/
1582
1583/* Scalar Timestamp */
1584typedef
1585 struct {
1586 Thr* thr;
1587 ULong tym;
1588 }
1589 ScalarTS;
1590
1591
1592static Bool is_sane_VTS ( VTS* vts )
1593{
1594 UWord i, n;
1595 ScalarTS *st1, *st2;
1596 if (!vts) return False;
1597 if (!vts->ts) return False;
1598 n = VG_(sizeXA)( vts->ts );
1599 if (n >= 2) {
1600 for (i = 0; i < n-1; i++) {
1601 st1 = VG_(indexXA)( vts->ts, i );
1602 st2 = VG_(indexXA)( vts->ts, i+1 );
1603 if (st1->thr >= st2->thr)
1604 return False;
1605 if (st1->tym == 0 || st2->tym == 0)
1606 return False;
1607 }
1608 }
1609 return True;
1610}
1611
1612
1613/* Create a new, empty VTS.
1614*/
1615VTS* VTS__new ( void )
1616{
1617 VTS* vts;
1618 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1619 tl_assert(vts);
1620 vts->id = VtsID_INVALID;
1621 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1622 HG_(free), sizeof(ScalarTS) );
1623 tl_assert(vts->ts);
1624 return vts;
1625}
1626
1627
1628/* Delete this VTS in its entirety.
1629*/
1630void VTS__delete ( VTS* vts )
1631{
1632 tl_assert(vts);
1633 tl_assert(vts->ts);
1634 VG_(deleteXA)( vts->ts );
1635 HG_(free)(vts);
1636}
1637
1638
1639/* Create a new singleton VTS.
1640*/
1641VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1642 ScalarTS st;
1643 VTS* vts;
1644 tl_assert(thr);
1645 tl_assert(tym >= 1);
1646 vts = VTS__new();
1647 st.thr = thr;
1648 st.tym = tym;
1649 VG_(addToXA)( vts->ts, &st );
1650 return vts;
1651}
1652
1653
1654/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1655 not modified.
1656*/
1657VTS* VTS__tick ( Thr* me, VTS* vts )
1658{
1659 ScalarTS* here = NULL;
1660 ScalarTS tmp;
1661 VTS* res;
1662 Word i, n;
1663 tl_assert(me);
1664 tl_assert(is_sane_VTS(vts));
1665 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1666 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1667 res = VTS__new();
1668 n = VG_(sizeXA)( vts->ts );
1669
1670 /* main loop doesn't handle zero-entry case correctly, so
1671 special-case it. */
1672 if (n == 0) {
1673 tmp.thr = me;
1674 tmp.tym = 1;
1675 VG_(addToXA)( res->ts, &tmp );
1676 tl_assert(is_sane_VTS(res));
1677 return res;
1678 }
1679
1680 for (i = 0; i < n; i++) {
1681 here = VG_(indexXA)( vts->ts, i );
1682 if (me < here->thr) {
1683 /* We just went past 'me', without seeing it. */
1684 tmp.thr = me;
1685 tmp.tym = 1;
1686 VG_(addToXA)( res->ts, &tmp );
1687 tmp = *here;
1688 VG_(addToXA)( res->ts, &tmp );
1689 i++;
1690 break;
1691 }
1692 else if (me == here->thr) {
1693 tmp = *here;
1694 tmp.tym++;
1695 VG_(addToXA)( res->ts, &tmp );
1696 i++;
1697 break;
1698 }
1699 else /* me > here->thr */ {
1700 tmp = *here;
1701 VG_(addToXA)( res->ts, &tmp );
1702 }
1703 }
1704 tl_assert(i >= 0 && i <= n);
1705 if (i == n && here && here->thr < me) {
1706 tmp.thr = me;
1707 tmp.tym = 1;
1708 VG_(addToXA)( res->ts, &tmp );
1709 } else {
1710 for (/*keepgoing*/; i < n; i++) {
1711 here = VG_(indexXA)( vts->ts, i );
1712 tmp = *here;
1713 VG_(addToXA)( res->ts, &tmp );
1714 }
1715 }
1716 tl_assert(is_sane_VTS(res));
1717 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1718 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1719 return res;
1720}
1721
1722
1723/* Return a new VTS constructed as the join (max) of the 2 args.
1724 Neither arg is modified.
1725*/
1726VTS* VTS__join ( VTS* a, VTS* b )
1727{
1728 Word ia, ib, useda, usedb;
1729 ULong tyma, tymb, tymMax;
1730 Thr* thr;
1731 VTS* res;
sewardjf98e1c02008-10-25 16:22:41 +00001732
1733 tl_assert(a && a->ts);
1734 tl_assert(b && b->ts);
1735 useda = VG_(sizeXA)( a->ts );
1736 usedb = VG_(sizeXA)( b->ts );
1737
1738 res = VTS__new();
1739 ia = ib = 0;
1740
1741 while (1) {
1742
1743 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1744 from a and b in order, where thr is the next Thr*
1745 occurring in either a or b, and tyma/b are the relevant
1746 scalar timestamps, taking into account implicit zeroes. */
1747 tl_assert(ia >= 0 && ia <= useda);
1748 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001749
njn4c245e52009-03-15 23:25:38 +00001750 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001751 /* both empty - done */
1752 break;
njn4c245e52009-03-15 23:25:38 +00001753
1754 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001755 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001756 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001757 thr = tmpb->thr;
1758 tyma = 0;
1759 tymb = tmpb->tym;
1760 ib++;
njn4c245e52009-03-15 23:25:38 +00001761
1762 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001763 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001764 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001765 thr = tmpa->thr;
1766 tyma = tmpa->tym;
1767 tymb = 0;
1768 ia++;
njn4c245e52009-03-15 23:25:38 +00001769
1770 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001771 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001772 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1773 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001774 if (tmpa->thr < tmpb->thr) {
1775 /* a has the lowest unconsidered Thr* */
1776 thr = tmpa->thr;
1777 tyma = tmpa->tym;
1778 tymb = 0;
1779 ia++;
njn4c245e52009-03-15 23:25:38 +00001780 } else if (tmpa->thr > tmpb->thr) {
sewardjf98e1c02008-10-25 16:22:41 +00001781 /* b has the lowest unconsidered Thr* */
1782 thr = tmpb->thr;
1783 tyma = 0;
1784 tymb = tmpb->tym;
1785 ib++;
1786 } else {
1787 /* they both next mention the same Thr* */
1788 tl_assert(tmpa->thr == tmpb->thr);
1789 thr = tmpa->thr; /* == tmpb->thr */
1790 tyma = tmpa->tym;
1791 tymb = tmpb->tym;
1792 ia++;
1793 ib++;
1794 }
1795 }
1796
1797 /* having laboriously determined (thr, tyma, tymb), do something
1798 useful with it. */
1799 tymMax = tyma > tymb ? tyma : tymb;
1800 if (tymMax > 0) {
1801 ScalarTS st;
1802 st.thr = thr;
1803 st.tym = tymMax;
1804 VG_(addToXA)( res->ts, &st );
1805 }
1806
1807 }
1808
1809 tl_assert(is_sane_VTS( res ));
1810
1811 return res;
1812}
1813
1814
sewardj23f12002009-07-24 08:45:08 +00001815/* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
1816 they are, or the first Thr* for which they are not. This rather
1817 strange convention is used because sometimes we want to know the
1818 actual index at which they first differ. */
1819static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001820{
sewardj23f12002009-07-24 08:45:08 +00001821 Word ia, ib, useda, usedb;
1822 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001823
1824 tl_assert(a && a->ts);
1825 tl_assert(b && b->ts);
1826 useda = VG_(sizeXA)( a->ts );
1827 usedb = VG_(sizeXA)( b->ts );
1828
1829 ia = ib = 0;
1830
1831 while (1) {
1832
njn4c245e52009-03-15 23:25:38 +00001833 /* This logic is to enumerate doubles (tyma, tymb) drawn
1834 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001835 scalar timestamps, taking into account implicit zeroes. */
sewardj23f12002009-07-24 08:45:08 +00001836 Thr* thr;
1837
sewardjf98e1c02008-10-25 16:22:41 +00001838 tl_assert(ia >= 0 && ia <= useda);
1839 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001840
njn4c245e52009-03-15 23:25:38 +00001841 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001842 /* both empty - done */
1843 break;
njn4c245e52009-03-15 23:25:38 +00001844
1845 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001846 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001847 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001848 tyma = 0;
1849 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001850 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001851 ib++;
njn4c245e52009-03-15 23:25:38 +00001852
1853 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001854 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001855 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001856 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001857 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001858 tymb = 0;
1859 ia++;
njn4c245e52009-03-15 23:25:38 +00001860
1861 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001862 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001863 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1864 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001865 if (tmpa->thr < tmpb->thr) {
1866 /* a has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001867 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001868 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001869 tymb = 0;
1870 ia++;
1871 }
1872 else
1873 if (tmpa->thr > tmpb->thr) {
1874 /* b has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001875 tyma = 0;
1876 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001877 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001878 ib++;
1879 } else {
1880 /* they both next mention the same Thr* */
1881 tl_assert(tmpa->thr == tmpb->thr);
sewardjf98e1c02008-10-25 16:22:41 +00001882 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001883 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001884 tymb = tmpb->tym;
1885 ia++;
1886 ib++;
1887 }
1888 }
1889
njn4c245e52009-03-15 23:25:38 +00001890 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00001891 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00001892 if (tyma > tymb) {
1893 /* not LEQ at this index. Quit, since the answer is
1894 determined already. */
1895 tl_assert(thr);
1896 return thr;
1897 }
sewardjf98e1c02008-10-25 16:22:41 +00001898 }
1899
sewardj23f12002009-07-24 08:45:08 +00001900 return NULL; /* all points are LEQ */
sewardjf98e1c02008-10-25 16:22:41 +00001901}
1902
1903
1904/* Compute an arbitrary structural (total) ordering on the two args,
1905 based on their VCs, so they can be looked up in a table, tree, etc.
1906 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1907*/
1908Word VTS__cmp_structural ( VTS* a, VTS* b )
1909{
1910 /* We just need to generate an arbitrary total ordering based on
1911 a->ts and b->ts. Preferably do it in a way which comes across likely
1912 differences relatively quickly. */
1913 Word i, useda, usedb;
1914 ScalarTS *tmpa, *tmpb;
1915
1916 tl_assert(a && a->ts);
1917 tl_assert(b && b->ts);
1918 useda = VG_(sizeXA)( a->ts );
1919 usedb = VG_(sizeXA)( b->ts );
1920
1921 if (useda < usedb) return -1;
1922 if (useda > usedb) return 1;
1923
1924 /* Same length vectors, so let's step through them together. */
1925 tl_assert(useda == usedb);
1926 for (i = 0; i < useda; i++) {
1927 tmpa = VG_(indexXA)( a->ts, i );
1928 tmpb = VG_(indexXA)( b->ts, i );
1929 if (tmpa->tym < tmpb->tym) return -1;
1930 if (tmpa->tym > tmpb->tym) return 1;
1931 if (tmpa->thr < tmpb->thr) return -1;
1932 if (tmpa->thr > tmpb->thr) return 1;
1933 }
1934
1935 /* They're identical. */
1936 return 0;
1937}
1938
1939
1940/* Debugging only. Display the given VTS in the buffer.
1941*/
1942void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1943 ScalarTS* st;
1944 HChar unit[64];
1945 Word i, n;
1946 Int avail = nBuf;
1947 tl_assert(vts && vts->ts);
1948 tl_assert(nBuf > 16);
1949 buf[0] = '[';
1950 buf[1] = 0;
1951 n = VG_(sizeXA)( vts->ts );
1952 for (i = 0; i < n; i++) {
1953 tl_assert(avail >= 40);
1954 st = VG_(indexXA)( vts->ts, i );
1955 VG_(memset)(unit, 0, sizeof(unit));
1956 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1957 st->thr, st->tym);
1958 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1959 VG_(strcat)(buf, " ...]");
1960 buf[nBuf-1] = 0;
1961 return;
1962 }
1963 VG_(strcat)(buf, unit);
1964 avail -= VG_(strlen)(unit);
1965 }
1966 VG_(strcat)(buf, "]");
1967 buf[nBuf-1] = 0;
1968}
1969
1970
1971/* Debugging only. Return vts[index], so to speak.
1972*/
1973ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1974 UWord i, n;
1975 tl_assert(vts && vts->ts);
1976 n = VG_(sizeXA)( vts->ts );
1977 for (i = 0; i < n; i++) {
1978 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1979 if (st->thr == idx)
1980 return st->tym;
1981 }
1982 return 0;
1983}
1984
1985
1986/////////////////////////////////////////////////////////////////
1987/////////////////////////////////////////////////////////////////
1988// //
1989// SECTION END vts primitives //
1990// //
1991/////////////////////////////////////////////////////////////////
1992/////////////////////////////////////////////////////////////////
1993
1994
1995
1996/////////////////////////////////////////////////////////////////
1997/////////////////////////////////////////////////////////////////
1998// //
1999// SECTION BEGIN main library //
2000// //
2001/////////////////////////////////////////////////////////////////
2002/////////////////////////////////////////////////////////////////
2003
2004
2005/////////////////////////////////////////////////////////
2006// //
2007// VTS set //
2008// //
2009/////////////////////////////////////////////////////////
2010
2011static WordFM* /* VTS* void void */ vts_set = NULL;
2012
2013static void vts_set_init ( void )
2014{
2015 tl_assert(!vts_set);
2016 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2017 HG_(free),
2018 (Word(*)(UWord,UWord))VTS__cmp_structural );
2019 tl_assert(vts_set);
2020}
2021
2022/* Given a newly made VTS, look in vts_set to see if we already have
2023 an identical one. If yes, free up this one and return instead a
2024 pointer to the existing one. If no, add this one to the set and
2025 return the same pointer. Caller differentiates the two cases by
2026 comparing returned pointer with the supplied one (although that
2027 does require that the supplied VTS is not already in the set).
2028*/
2029static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2030{
2031 UWord keyW, valW;
2032 /* lookup cand (by value) */
2033 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2034 /* found it */
2035 tl_assert(valW == 0);
2036 /* if this fails, cand (by ref) was already present (!) */
2037 tl_assert(keyW != (UWord)cand);
2038 VTS__delete(cand);
2039 return (VTS*)keyW;
2040 } else {
2041 /* not present. Add and return pointer to same. */
2042 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2043 return cand;
2044 }
2045}
2046
2047
2048/////////////////////////////////////////////////////////
2049// //
2050// VTS table //
2051// //
2052/////////////////////////////////////////////////////////
2053
2054static void VtsID__invalidate_caches ( void ); /* fwds */
2055
2056/* A type to hold VTS table entries. Invariants:
2057 If .vts == NULL, then this entry is not in use, so:
2058 - .rc == 0
2059 - this entry is on the freelist (unfortunately, does not imply
2060 any constraints on value for .nextfree)
2061 If .vts != NULL, then this entry is in use:
2062 - .vts is findable in vts_set
2063 - .vts->id == this entry number
2064 - no specific value for .rc (even 0 is OK)
2065 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2066*/
2067typedef
2068 struct {
2069 VTS* vts; /* vts, in vts_set */
2070 UWord rc; /* reference count - enough for entire aspace */
2071 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2072 }
2073 VtsTE;
2074
2075/* The VTS table. */
2076static XArray* /* of VtsTE */ vts_tab = NULL;
2077
2078/* An index into the VTS table, indicating the start of the list of
2079 free (available for use) entries. If the list is empty, this is
2080 VtsID_INVALID. */
2081static VtsID vts_tab_freelist = VtsID_INVALID;
2082
2083/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2084 vts_tab equals or exceeds this size. After GC, the value here is
2085 set appropriately so as to check for the next GC point. */
2086static Word vts_next_GC_at = 1000;
2087
2088static void vts_tab_init ( void )
2089{
2090 vts_tab
2091 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2092 HG_(free), sizeof(VtsTE) );
2093 vts_tab_freelist
2094 = VtsID_INVALID;
2095 tl_assert(vts_tab);
2096}
2097
2098/* Add ii to the free list, checking that it looks out-of-use. */
2099static void add_to_free_list ( VtsID ii )
2100{
2101 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2102 tl_assert(ie->vts == NULL);
2103 tl_assert(ie->rc == 0);
2104 tl_assert(ie->freelink == VtsID_INVALID);
2105 ie->freelink = vts_tab_freelist;
2106 vts_tab_freelist = ii;
2107}
2108
2109/* Get an entry from the free list. This will return VtsID_INVALID if
2110 the free list is empty. */
2111static VtsID get_from_free_list ( void )
2112{
2113 VtsID ii;
2114 VtsTE* ie;
2115 if (vts_tab_freelist == VtsID_INVALID)
2116 return VtsID_INVALID;
2117 ii = vts_tab_freelist;
2118 ie = VG_(indexXA)( vts_tab, ii );
2119 tl_assert(ie->vts == NULL);
2120 tl_assert(ie->rc == 0);
2121 vts_tab_freelist = ie->freelink;
2122 return ii;
2123}
2124
2125/* Produce a new VtsID that can be used, either by getting it from
2126 the freelist, or, if that is empty, by expanding vts_tab. */
2127static VtsID get_new_VtsID ( void )
2128{
2129 VtsID ii;
2130 VtsTE te;
2131 ii = get_from_free_list();
2132 if (ii != VtsID_INVALID)
2133 return ii;
2134 te.vts = NULL;
2135 te.rc = 0;
2136 te.freelink = VtsID_INVALID;
2137 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2138 return ii;
2139}
2140
2141
2142/* Indirect callback from lib_zsm. */
2143static void VtsID__rcinc ( VtsID ii )
2144{
2145 VtsTE* ie;
2146 /* VG_(indexXA) does a range check for us */
2147 ie = VG_(indexXA)( vts_tab, ii );
2148 tl_assert(ie->vts); /* else it's not in use */
2149 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2150 tl_assert(ie->vts->id == ii);
2151 ie->rc++;
2152}
2153
2154/* Indirect callback from lib_zsm. */
2155static void VtsID__rcdec ( VtsID ii )
2156{
2157 VtsTE* ie;
2158 /* VG_(indexXA) does a range check for us */
2159 ie = VG_(indexXA)( vts_tab, ii );
2160 tl_assert(ie->vts); /* else it's not in use */
2161 tl_assert(ie->rc > 0); /* else RC snafu */
2162 tl_assert(ie->vts->id == ii);
2163 ie->rc--;
2164}
2165
2166
2167/* Look up 'cand' in our collection of VTSs. If present, deallocate
2168 it and return the VtsID for the pre-existing version. If not
2169 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2170 for it, and return that. */
2171static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2172{
2173 VTS* auld;
2174 tl_assert(cand->id == VtsID_INVALID);
2175 auld = vts_set__find_and_dealloc__or_add(cand);
2176 if (auld != cand) {
2177 /* We already have an Aulde one. Use that. */
2178 VtsTE* ie;
2179 tl_assert(auld->id != VtsID_INVALID);
2180 ie = VG_(indexXA)( vts_tab, auld->id );
2181 tl_assert(ie->vts == auld);
2182 return auld->id;
2183 } else {
2184 VtsID ii = get_new_VtsID();
2185 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2186 ie->vts = cand;
2187 ie->rc = 0;
2188 ie->freelink = VtsID_INVALID;
2189 cand->id = ii;
2190 return ii;
2191 }
2192}
2193
2194
2195static void show_vts_stats ( HChar* caller )
2196{
2197 UWord nSet, nTab, nLive;
2198 ULong totrc;
2199 UWord n, i;
2200 nSet = VG_(sizeFM)( vts_set );
2201 nTab = VG_(sizeXA)( vts_tab );
2202 totrc = 0;
2203 nLive = 0;
2204 n = VG_(sizeXA)( vts_tab );
2205 for (i = 0; i < n; i++) {
2206 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2207 if (ie->vts) {
2208 nLive++;
2209 totrc += (ULong)ie->rc;
2210 } else {
2211 tl_assert(ie->rc == 0);
2212 }
2213 }
2214 VG_(printf)(" show_vts_stats %s\n", caller);
2215 VG_(printf)(" vts_tab size %4lu\n", nTab);
2216 VG_(printf)(" vts_tab live %4lu\n", nLive);
2217 VG_(printf)(" vts_set size %4lu\n", nSet);
2218 VG_(printf)(" total rc %4llu\n", totrc);
2219}
2220
2221/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002222__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002223static void vts_tab__do_GC ( Bool show_stats )
2224{
2225 UWord i, nTab, nLive, nFreed;
2226
2227 /* check this is actually necessary. */
2228 tl_assert(vts_tab_freelist == VtsID_INVALID);
2229
2230 /* empty the caches for partial order checks and binary joins. We
2231 could do better and prune out the entries to be deleted, but it
2232 ain't worth the hassle. */
2233 VtsID__invalidate_caches();
2234
2235 /* First, make the reference counts up to date. */
2236 zsm_flush_cache();
2237
2238 nTab = VG_(sizeXA)( vts_tab );
2239
2240 if (show_stats) {
2241 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2242 show_vts_stats("before GC");
2243 }
2244
2245 /* Now we can inspect the entire vts_tab. Any entries
2246 with zero .rc fields are now no longer in use and can be
2247 free list, removed from vts_set, and deleted. */
2248 nFreed = 0;
2249 for (i = 0; i < nTab; i++) {
2250 Bool present;
2251 UWord oldK = 0, oldV = 0;
2252 VtsTE* te = VG_(indexXA)( vts_tab, i );
2253 if (te->vts == NULL) {
2254 tl_assert(te->rc == 0);
2255 continue; /* already on the free list (presumably) */
2256 }
2257 if (te->rc > 0)
2258 continue; /* in use */
2259 /* Ok, we got one we can free. */
2260 tl_assert(te->vts->id == i);
2261 /* first, remove it from vts_set. */
2262 present = VG_(delFromFM)( vts_set,
2263 &oldK, &oldV, (UWord)te->vts );
2264 tl_assert(present); /* else it isn't in vts_set ?! */
2265 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2266 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2267 /* now free the VTS itself */
2268 VTS__delete(te->vts);
2269 te->vts = NULL;
2270 /* and finally put this entry on the free list */
2271 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2272 add_to_free_list( i );
2273 nFreed++;
2274 }
2275
2276 /* Now figure out when the next GC should be. We'll allow the
2277 number of VTSs to double before GCing again. Except of course
2278 that since we can't (or, at least, don't) shrink vts_tab, we
2279 can't set the threshhold value smaller than it. */
2280 tl_assert(nFreed <= nTab);
2281 nLive = nTab - nFreed;
2282 tl_assert(nLive >= 0 && nLive <= nTab);
2283 vts_next_GC_at = 2 * nLive;
2284 if (vts_next_GC_at < nTab)
2285 vts_next_GC_at = nTab;
2286
2287 if (show_stats) {
2288 show_vts_stats("after GC");
2289 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2290 }
2291
sewardjd024ae52008-11-09 20:47:57 +00002292 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002293 static UInt ctr = 0;
2294 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002295 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002296 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002297 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002298 }
2299}
2300
2301
2302/////////////////////////////////////////////////////////
2303// //
2304// Vts IDs //
2305// //
2306/////////////////////////////////////////////////////////
2307
2308//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00002309static ULong stats__cmpLEQ_queries = 0;
2310static ULong stats__cmpLEQ_misses = 0;
2311static ULong stats__join2_queries = 0;
2312static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002313
2314static inline UInt ROL32 ( UInt w, Int n ) {
2315 w = (w << n) | (w >> (32-n));
2316 return w;
2317}
2318static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2319 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2320 return hash % nTab;
2321}
2322
sewardj23f12002009-07-24 08:45:08 +00002323#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00002324static
sewardj23f12002009-07-24 08:45:08 +00002325 struct { VtsID vi1; VtsID vi2; Bool leq; }
2326 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00002327
2328#define N_JOIN2_CACHE 1023
2329static
2330 struct { VtsID vi1; VtsID vi2; VtsID res; }
2331 join2_cache[N_JOIN2_CACHE];
2332
2333static void VtsID__invalidate_caches ( void ) {
2334 Int i;
sewardj23f12002009-07-24 08:45:08 +00002335 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2336 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2337 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2338 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00002339 }
2340 for (i = 0; i < N_JOIN2_CACHE; i++) {
2341 join2_cache[i].vi1 = VtsID_INVALID;
2342 join2_cache[i].vi2 = VtsID_INVALID;
2343 join2_cache[i].res = VtsID_INVALID;
2344 }
2345}
2346//////////////////////////
2347
sewardjd52392d2008-11-08 20:36:26 +00002348//static Bool VtsID__is_valid ( VtsID vi ) {
2349// VtsTE* ve;
2350// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2351// return False;
2352// ve = VG_(indexXA)( vts_tab, vi );
2353// if (!ve->vts)
2354// return False;
2355// tl_assert(ve->vts->id == vi);
2356// return True;
2357//}
sewardjf98e1c02008-10-25 16:22:41 +00002358
2359static VTS* VtsID__to_VTS ( VtsID vi ) {
2360 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2361 tl_assert(te->vts);
2362 return te->vts;
2363}
2364
2365static void VtsID__pp ( VtsID vi ) {
2366 HChar buf[100];
2367 VTS* vts = VtsID__to_VTS(vi);
2368 VTS__show( buf, sizeof(buf)-1, vts );
2369 buf[sizeof(buf)-1] = 0;
2370 VG_(printf)("%s", buf);
2371}
2372
2373/* compute partial ordering relation of vi1 and vi2. */
2374__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00002375static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00002376 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00002377 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00002378 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00002379 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00002380 tl_assert(vi1 != vi2);
2381 ////++
sewardj23f12002009-07-24 08:45:08 +00002382 stats__cmpLEQ_queries++;
2383 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2384 if (cmpLEQ_cache[hash].vi1 == vi1
2385 && cmpLEQ_cache[hash].vi2 == vi2)
2386 return cmpLEQ_cache[hash].leq;
2387 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00002388 ////--
2389 v1 = VtsID__to_VTS(vi1);
2390 v2 = VtsID__to_VTS(vi2);
sewardj23f12002009-07-24 08:45:08 +00002391 leq = VTS__cmpLEQ( v1, v2 ) == NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002392 ////++
sewardj23f12002009-07-24 08:45:08 +00002393 cmpLEQ_cache[hash].vi1 = vi1;
2394 cmpLEQ_cache[hash].vi2 = vi2;
2395 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00002396 ////--
sewardj23f12002009-07-24 08:45:08 +00002397 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00002398}
sewardj23f12002009-07-24 08:45:08 +00002399static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2400 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002401}
2402
2403/* compute binary join */
2404__attribute__((noinline))
2405static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2406 UInt hash;
2407 VtsID res;
2408 VTS *vts1, *vts2, *nyu;
2409 //if (vi1 == vi2) return vi1;
2410 tl_assert(vi1 != vi2);
2411 ////++
2412 stats__join2_queries++;
2413 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2414 if (join2_cache[hash].vi1 == vi1
2415 && join2_cache[hash].vi2 == vi2)
2416 return join2_cache[hash].res;
2417 stats__join2_misses++;
2418 ////--
2419 vts1 = VtsID__to_VTS(vi1);
2420 vts2 = VtsID__to_VTS(vi2);
2421 nyu = VTS__join(vts1,vts2);
2422 res = vts_tab__find_and_dealloc__or_add(nyu);
2423 ////++
2424 join2_cache[hash].vi1 = vi1;
2425 join2_cache[hash].vi2 = vi2;
2426 join2_cache[hash].res = res;
2427 ////--
2428 return res;
2429}
2430static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00002431 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002432}
2433
2434/* create a singleton VTS, namely [thr:1] */
2435static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2436 VTS* nyu = VTS__singleton(thr,tym);
2437 return vts_tab__find_and_dealloc__or_add(nyu);
2438}
2439
2440/* tick operation, creates value 1 if specified index is absent */
2441static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2442 VTS* vts = VtsID__to_VTS(vi);
2443 VTS* nyu = VTS__tick(idx,vts);
2444 return vts_tab__find_and_dealloc__or_add(nyu);
2445}
2446
2447/* index into a VTS (only for assertions) */
2448static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2449 VTS* vts = VtsID__to_VTS(vi);
2450 return VTS__indexAt_SLOW( vts, idx );
2451}
2452
sewardj23f12002009-07-24 08:45:08 +00002453/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2454 any, really) element in vi1 which is pointwise greater-than the
2455 corresponding element in vi2. If no such element exists, return
2456 NULL. This needs to be fairly quick since it is called every time
2457 a race is detected. */
2458static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2459{
2460 VTS *vts1, *vts2;
2461 Thr* diffthr;
2462 tl_assert(vi1 != vi2);
2463 vts1 = VtsID__to_VTS(vi1);
2464 vts2 = VtsID__to_VTS(vi2);
2465 tl_assert(vts1 != vts2);
2466 diffthr = VTS__cmpLEQ(vts1, vts2);
2467 tl_assert(diffthr); /* else they are LEQ ! */
2468 return diffthr;
2469}
2470
2471
2472/////////////////////////////////////////////////////////
2473// //
2474// Filters //
2475// //
2476/////////////////////////////////////////////////////////
2477
2478// baseline: 5, 9
2479#define FI_LINE_SZB_LOG2 5
2480#define FI_NUM_LINES_LOG2 10
2481
2482#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2483#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2484
2485#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2486#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2487
2488#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2489 & (Addr)(FI_NUM_LINES-1) )
2490
2491
2492/* In the lines, each 8 bytes are treated individually, and are mapped
2493 to a UShort. Regardless of endianness of the underlying machine,
2494 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2495 the highest address.
2496
2497 Of each bit pair, the higher numbered bit is set if a R has been
2498 seen, so the actual layout is:
2499
2500 15 14 ... 01 00
2501
2502 R W for addr+7 ... R W for addr+0
2503
2504 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2505*/
2506
2507/* tags are separated from lines. tags are Addrs and are
2508 the base address of the line. */
2509typedef
2510 struct {
2511 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2512 }
2513 FiLine;
2514
2515typedef
2516 struct {
2517 Addr tags[FI_NUM_LINES];
2518 FiLine lines[FI_NUM_LINES];
2519 }
2520 Filter;
2521
2522/* Forget everything we know -- clear the filter and let everything
2523 through. This needs to be as fast as possible, since it is called
2524 every time the running thread changes, and every time a thread's
2525 vector clocks change, which can be quite frequent. The obvious
2526 fast way to do this is simply to stuff in tags which we know are
2527 not going to match anything, since they're not aligned to the start
2528 of a line. */
2529static void Filter__clear ( Filter* fi, HChar* who )
2530{
2531 UWord i;
2532 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2533 for (i = 0; i < FI_NUM_LINES; i += 8) {
2534 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2535 fi->tags[i+1] = 1;
2536 fi->tags[i+2] = 1;
2537 fi->tags[i+3] = 1;
2538 fi->tags[i+4] = 1;
2539 fi->tags[i+5] = 1;
2540 fi->tags[i+6] = 1;
2541 fi->tags[i+7] = 1;
2542 }
2543 tl_assert(i == FI_NUM_LINES);
2544}
2545
2546/* Clearing an arbitrary range in the filter. Unfortunately
2547 we have to do this due to core-supplied new/die-mem events. */
2548
2549static void Filter__clear_1byte ( Filter* fi, Addr a )
2550{
2551 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2552 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2553 FiLine* line = &fi->lines[lineno];
2554 UWord loff = (a - atag) / 8;
2555 UShort mask = 0x3 << (2 * (a & 7));
2556 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2557 if (LIKELY( fi->tags[lineno] == atag )) {
2558 /* hit. clear the bits. */
2559 UShort u16 = line->u16s[loff];
2560 line->u16s[loff] = u16 & ~mask; /* clear them */
2561 } else {
2562 /* miss. The filter doesn't hold this address, so ignore. */
2563 }
2564}
2565
2566static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2567{
2568 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2569 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2570 FiLine* line = &fi->lines[lineno];
2571 UWord loff = (a - atag) / 8;
2572 if (LIKELY( fi->tags[lineno] == atag )) {
2573 line->u16s[loff] = 0;
2574 } else {
2575 /* miss. The filter doesn't hold this address, so ignore. */
2576 }
2577}
2578
2579static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2580{
2581 //VG_(printf)("%lu ", len);
2582 /* slowly do part preceding 8-alignment */
2583 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2584 Filter__clear_1byte( fi, a );
2585 a++;
2586 len--;
2587 }
2588 /* vector loop */
2589 while (len >= 8) {
2590 Filter__clear_8bytes_aligned( fi, a );
2591 a += 8;
2592 len -= 8;
2593 }
2594 /* slowly do tail */
2595 while (UNLIKELY(len > 0)) {
2596 Filter__clear_1byte( fi, a );
2597 a++;
2598 len--;
2599 }
2600}
2601
2602
2603/* ------ Read handlers for the filter. ------ */
2604
2605static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2606{
2607 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2608 return False;
2609 {
2610 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2611 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2612 FiLine* line = &fi->lines[lineno];
2613 UWord loff = (a - atag) / 8;
2614 UShort mask = 0xAAAA;
2615 if (LIKELY( fi->tags[lineno] == atag )) {
2616 /* hit. check line and update. */
2617 UShort u16 = line->u16s[loff];
2618 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2619 line->u16s[loff] = u16 | mask; /* set them */
2620 return ok;
2621 } else {
2622 /* miss. nuke existing line and re-use it. */
2623 UWord i;
2624 fi->tags[lineno] = atag;
2625 for (i = 0; i < FI_LINE_SZB / 8; i++)
2626 line->u16s[i] = 0;
2627 line->u16s[loff] = mask;
2628 return False;
2629 }
2630 }
2631}
2632
2633static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2634{
2635 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2636 return False;
2637 {
2638 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2639 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2640 FiLine* line = &fi->lines[lineno];
2641 UWord loff = (a - atag) / 8;
2642 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2643 if (LIKELY( fi->tags[lineno] == atag )) {
2644 /* hit. check line and update. */
2645 UShort u16 = line->u16s[loff];
2646 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2647 line->u16s[loff] = u16 | mask; /* set them */
2648 return ok;
2649 } else {
2650 /* miss. nuke existing line and re-use it. */
2651 UWord i;
2652 fi->tags[lineno] = atag;
2653 for (i = 0; i < FI_LINE_SZB / 8; i++)
2654 line->u16s[i] = 0;
2655 line->u16s[loff] = mask;
2656 return False;
2657 }
2658 }
2659}
2660
2661static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2662{
2663 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2664 return False;
2665 {
2666 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2667 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2668 FiLine* line = &fi->lines[lineno];
2669 UWord loff = (a - atag) / 8;
2670 UShort mask = 0xA << (2 * (a & 6));
2671 /* mask is A000, 0A00, 00A0 or 000A */
2672 if (LIKELY( fi->tags[lineno] == atag )) {
2673 /* hit. check line and update. */
2674 UShort u16 = line->u16s[loff];
2675 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2676 line->u16s[loff] = u16 | mask; /* set them */
2677 return ok;
2678 } else {
2679 /* miss. nuke existing line and re-use it. */
2680 UWord i;
2681 fi->tags[lineno] = atag;
2682 for (i = 0; i < FI_LINE_SZB / 8; i++)
2683 line->u16s[i] = 0;
2684 line->u16s[loff] = mask;
2685 return False;
2686 }
2687 }
2688}
2689
2690static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2691{
2692 {
2693 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2694 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2695 FiLine* line = &fi->lines[lineno];
2696 UWord loff = (a - atag) / 8;
2697 UShort mask = 0x2 << (2 * (a & 7));
2698 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2699 if (LIKELY( fi->tags[lineno] == atag )) {
2700 /* hit. check line and update. */
2701 UShort u16 = line->u16s[loff];
2702 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2703 line->u16s[loff] = u16 | mask; /* set them */
2704 return ok;
2705 } else {
2706 /* miss. nuke existing line and re-use it. */
2707 UWord i;
2708 fi->tags[lineno] = atag;
2709 for (i = 0; i < FI_LINE_SZB / 8; i++)
2710 line->u16s[i] = 0;
2711 line->u16s[loff] = mask;
2712 return False;
2713 }
2714 }
2715}
2716
2717
2718/* ------ Write handlers for the filter. ------ */
2719
2720static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2721{
2722 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2723 return False;
2724 {
2725 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2726 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2727 FiLine* line = &fi->lines[lineno];
2728 UWord loff = (a - atag) / 8;
2729 UShort mask = 0xFFFF;
2730 if (LIKELY( fi->tags[lineno] == atag )) {
2731 /* hit. check line and update. */
2732 UShort u16 = line->u16s[loff];
2733 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2734 line->u16s[loff] = u16 | mask; /* set them */
2735 return ok;
2736 } else {
2737 /* miss. nuke existing line and re-use it. */
2738 UWord i;
2739 fi->tags[lineno] = atag;
2740 for (i = 0; i < FI_LINE_SZB / 8; i++)
2741 line->u16s[i] = 0;
2742 line->u16s[loff] = mask;
2743 return False;
2744 }
2745 }
2746}
2747
2748static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2749{
2750 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2751 return False;
2752 {
2753 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2754 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2755 FiLine* line = &fi->lines[lineno];
2756 UWord loff = (a - atag) / 8;
2757 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2758 if (LIKELY( fi->tags[lineno] == atag )) {
2759 /* hit. check line and update. */
2760 UShort u16 = line->u16s[loff];
2761 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2762 line->u16s[loff] = u16 | mask; /* set them */
2763 return ok;
2764 } else {
2765 /* miss. nuke existing line and re-use it. */
2766 UWord i;
2767 fi->tags[lineno] = atag;
2768 for (i = 0; i < FI_LINE_SZB / 8; i++)
2769 line->u16s[i] = 0;
2770 line->u16s[loff] = mask;
2771 return False;
2772 }
2773 }
2774}
2775
2776static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2777{
2778 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2779 return False;
2780 {
2781 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2782 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2783 FiLine* line = &fi->lines[lineno];
2784 UWord loff = (a - atag) / 8;
2785 UShort mask = 0xF << (2 * (a & 6));
2786 /* mask is F000, 0F00, 00F0 or 000F */
2787 if (LIKELY( fi->tags[lineno] == atag )) {
2788 /* hit. check line and update. */
2789 UShort u16 = line->u16s[loff];
2790 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2791 line->u16s[loff] = u16 | mask; /* set them */
2792 return ok;
2793 } else {
2794 /* miss. nuke existing line and re-use it. */
2795 UWord i;
2796 fi->tags[lineno] = atag;
2797 for (i = 0; i < FI_LINE_SZB / 8; i++)
2798 line->u16s[i] = 0;
2799 line->u16s[loff] = mask;
2800 return False;
2801 }
2802 }
2803}
2804
2805static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2806{
2807 {
2808 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2809 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2810 FiLine* line = &fi->lines[lineno];
2811 UWord loff = (a - atag) / 8;
2812 UShort mask = 0x3 << (2 * (a & 7));
2813 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2814 if (LIKELY( fi->tags[lineno] == atag )) {
2815 /* hit. check line and update. */
2816 UShort u16 = line->u16s[loff];
2817 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2818 line->u16s[loff] = u16 | mask; /* set them */
2819 return ok;
2820 } else {
2821 /* miss. nuke existing line and re-use it. */
2822 UWord i;
2823 fi->tags[lineno] = atag;
2824 for (i = 0; i < FI_LINE_SZB / 8; i++)
2825 line->u16s[i] = 0;
2826 line->u16s[loff] = mask;
2827 return False;
2828 }
2829 }
2830}
2831
sewardjf98e1c02008-10-25 16:22:41 +00002832
2833/////////////////////////////////////////////////////////
2834// //
2835// Threads //
2836// //
2837/////////////////////////////////////////////////////////
2838
sewardj23f12002009-07-24 08:45:08 +00002839// QQQ move this somewhere else
2840typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
2841
sewardjf98e1c02008-10-25 16:22:41 +00002842struct _Thr {
2843 /* Current VTSs for this thread. They change as we go along. viR
2844 is the VTS to be used for reads, viW for writes. Usually they
2845 are the same, but can differ when we deal with reader-writer
sewardj23f12002009-07-24 08:45:08 +00002846 locks. It is always the case that
2847 VtsID__cmpLEQ(viW,viR) == True
2848 that is, viW must be the same, or lagging behind, viR. */
sewardjf98e1c02008-10-25 16:22:41 +00002849 VtsID viR;
2850 VtsID viW;
sewardj23f12002009-07-24 08:45:08 +00002851
2852 /* Is initially False, and is set to true after the thread really
2853 has done a low-level exit. */
2854 Bool still_alive;
2855
2856 /* A filter that removes references for which we believe that
2857 msmcread/msmcwrite will not change the state, nor report a
2858 race. */
2859 Filter* filter;
2860
sewardjf98e1c02008-10-25 16:22:41 +00002861 /* opaque (to us) data we hold on behalf of the library's user. */
2862 void* opaque;
sewardj23f12002009-07-24 08:45:08 +00002863
2864 /* The ULongs (scalar Krs) in this accumulate in strictly
2865 increasing order, without duplicates. This is important because
2866 we need to be able to find a given scalar Kr in this array
2867 later, by binary search. */
2868 XArray* /* ULong_n_EC */ local_Krs_n_stacks;
sewardjf98e1c02008-10-25 16:22:41 +00002869};
2870
2871static Thr* Thr__new ( void ) {
2872 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2873 thr->viR = VtsID_INVALID;
2874 thr->viW = VtsID_INVALID;
sewardj23f12002009-07-24 08:45:08 +00002875 thr->still_alive = True;
2876 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
2877 thr->local_Krs_n_stacks
2878 = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.3 (local_Krs_and_stacks)",
2879 HG_(free), sizeof(ULong_n_EC) );
sewardjf98e1c02008-10-25 16:22:41 +00002880 return thr;
2881}
2882
sewardj23f12002009-07-24 08:45:08 +00002883static void note_local_Kr_n_stack_for ( Thr* thr )
2884{
2885 Word nPresent;
2886 ULong_n_EC pair;
2887 tl_assert(thr);
2888 /* This is the scalar Kr for thr. */
2889 pair.ull = VtsID__indexAt( thr->viR, thr );
2890 pair.ec = main_get_EC( thr );
2891 tl_assert(pair.ec);
2892 tl_assert(thr->local_Krs_n_stacks);
2893
2894 /* check that we're not adding duplicates */
2895 nPresent = VG_(sizeXA)( thr->local_Krs_n_stacks );
2896
2897 /* Throw away old stacks, if necessary. We can't accumulate stuff
2898 indefinitely. */
2899 if (nPresent > 10000) {
2900 VG_(dropHeadXA)( thr->local_Krs_n_stacks, nPresent / 2 );
2901 nPresent = VG_(sizeXA)( thr->local_Krs_n_stacks );
2902 if (1)
2903 VG_(printf)("LOCAL Kr: thr %p, Kr %llu, ec %p (!!! gc !!!)\n",
2904 thr, pair.ull, pair.ec );
2905 }
2906
2907 if (nPresent > 0) {
2908 ULong_n_EC* prevPair
2909 = (ULong_n_EC*)VG_(indexXA)( thr->local_Krs_n_stacks, nPresent-1 );
2910 tl_assert( prevPair->ull < pair.ull );
2911 }
2912
2913 if (nPresent == 0)
2914 pair.ec = NULL;
2915
2916 VG_(addToXA)( thr->local_Krs_n_stacks, &pair );
2917
2918 if (0)
2919 VG_(printf)("LOCAL Kr: thr %p, Kr %llu, ec %p\n",
2920 thr, pair.ull, pair.ec );
2921 if (0)
2922 VG_(pp_ExeContext)(pair.ec);
2923}
2924
2925static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
2926{
2927 if (pair1->ull < pair2->ull) return -1;
2928 if (pair1->ull > pair2->ull) return 1;
2929 return 0;
2930}
2931
sewardjf98e1c02008-10-25 16:22:41 +00002932
2933/////////////////////////////////////////////////////////
2934// //
2935// Shadow Values //
2936// //
2937/////////////////////////////////////////////////////////
2938
2939// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2940// hb_zsm.h. We have to do everything else here.
2941
2942/* SVal is 64 bit unsigned int.
2943
2944 <---------30---------> <---------30--------->
2945 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00002946 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00002947 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
2948
sewardjf98e1c02008-10-25 16:22:41 +00002949*/
2950#define SVAL_TAGMASK (3ULL << 62)
2951
2952static inline Bool SVal__isC ( SVal s ) {
2953 return (0ULL << 62) == (s & SVAL_TAGMASK);
2954}
2955static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2956 //tl_assert(VtsID__is_valid(rmini));
2957 //tl_assert(VtsID__is_valid(wmini));
2958 return (((ULong)rmini) << 32) | ((ULong)wmini);
2959}
2960static inline VtsID SVal__unC_Rmin ( SVal s ) {
2961 tl_assert(SVal__isC(s));
2962 return (VtsID)(s >> 32);
2963}
2964static inline VtsID SVal__unC_Wmin ( SVal s ) {
2965 tl_assert(SVal__isC(s));
2966 return (VtsID)(s & 0xFFFFFFFFULL);
2967}
2968
sewardj23f12002009-07-24 08:45:08 +00002969static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00002970 return (2ULL << 62) == (s & SVAL_TAGMASK);
2971}
sewardj23f12002009-07-24 08:45:08 +00002972static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00002973 return 2ULL << 62;
2974}
2975
2976/* Direct callback from lib_zsm. */
2977static void SVal__rcinc ( SVal s ) {
2978 if (SVal__isC(s)) {
2979 VtsID__rcinc( SVal__unC_Rmin(s) );
2980 VtsID__rcinc( SVal__unC_Wmin(s) );
2981 }
2982}
2983
2984/* Direct callback from lib_zsm. */
2985static void SVal__rcdec ( SVal s ) {
2986 if (SVal__isC(s)) {
2987 VtsID__rcdec( SVal__unC_Rmin(s) );
2988 VtsID__rcdec( SVal__unC_Wmin(s) );
2989 }
2990}
2991
2992
2993/////////////////////////////////////////////////////////
2994// //
sewardjd86e3a22008-12-03 11:39:37 +00002995// A simple group (memory) allocator //
2996// //
2997/////////////////////////////////////////////////////////
2998
2999//////////////// BEGIN general group allocator
3000typedef
3001 struct {
3002 UWord elemSzB; /* element size */
3003 UWord nPerGroup; /* # elems per group */
3004 void* (*alloc)(HChar*, SizeT); /* group allocator */
3005 HChar* cc; /* group allocator's cc */
3006 void (*free)(void*); /* group allocator's free-er (unused) */
3007 /* XArray of void* (pointers to groups). The groups themselves.
3008 Each element is a pointer to a block of size (elemSzB *
3009 nPerGroup) bytes. */
3010 XArray* groups;
3011 /* next free element. Is a pointer to an element in one of the
3012 groups pointed to by .groups. */
3013 void* nextFree;
3014 }
3015 GroupAlloc;
3016
3017static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3018 UWord elemSzB,
3019 UWord nPerGroup,
3020 void* (*alloc)(HChar*, SizeT),
3021 HChar* cc,
3022 void (*free)(void*) )
3023{
3024 tl_assert(0 == (elemSzB % sizeof(UWord)));
3025 tl_assert(elemSzB >= sizeof(UWord));
3026 tl_assert(nPerGroup >= 100); /* let's say */
3027 tl_assert(alloc);
3028 tl_assert(cc);
3029 tl_assert(free);
3030 tl_assert(ga);
3031 VG_(memset)(ga, 0, sizeof(*ga));
3032 ga->elemSzB = elemSzB;
3033 ga->nPerGroup = nPerGroup;
3034 ga->groups = NULL;
3035 ga->alloc = alloc;
3036 ga->cc = cc;
3037 ga->free = free;
3038 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3039 ga->nextFree = NULL;
3040 tl_assert(ga->groups);
3041}
3042
3043/* The freelist is empty. Allocate a new group and put all the new
3044 elements in it onto the freelist. */
3045__attribute__((noinline))
3046static void gal_add_new_group ( GroupAlloc* ga )
3047{
3048 Word i;
3049 UWord* group;
3050 tl_assert(ga);
3051 tl_assert(ga->nextFree == NULL);
3052 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3053 tl_assert(group);
3054 /* extend the freelist through the new group. Place the freelist
3055 pointer in the first word of each element. That's why the
3056 element size must be at least one word. */
3057 for (i = ga->nPerGroup-1; i >= 0; i--) {
3058 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3059 UWord* elem = (UWord*)elemC;
3060 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3061 *elem = (UWord)ga->nextFree;
3062 ga->nextFree = elem;
3063 }
3064 /* and add to our collection of groups */
3065 VG_(addToXA)( ga->groups, &group );
3066}
3067
3068inline static void* gal_Alloc ( GroupAlloc* ga )
3069{
3070 UWord* elem;
3071 if (UNLIKELY(ga->nextFree == NULL)) {
3072 gal_add_new_group(ga);
3073 }
3074 elem = ga->nextFree;
3075 ga->nextFree = (void*)*elem;
3076 *elem = 0; /* unnecessary, but just to be on the safe side */
3077 return elem;
3078}
3079
3080inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3081{
3082 tl_assert(n == ga->elemSzB);
3083 return gal_Alloc( ga );
3084}
3085
3086inline static void gal_Free ( GroupAlloc* ga, void* p )
3087{
3088 UWord* elem = (UWord*)p;
3089 *elem = (UWord)ga->nextFree;
3090 ga->nextFree = elem;
3091}
3092//////////////// END general group allocator
3093
3094
3095/////////////////////////////////////////////////////////
3096// //
sewardjf98e1c02008-10-25 16:22:41 +00003097// Change-event map2 //
3098// //
3099/////////////////////////////////////////////////////////
3100
sewardjf98e1c02008-10-25 16:22:41 +00003101#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3102
3103/* This is in two parts:
3104
sewardj23f12002009-07-24 08:45:08 +00003105 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003106 traces. When the reference count of a stack trace becomes zero,
3107 it is removed from the set and freed up. The intent is to have
3108 a set of stack traces which can be referred to from (2), but to
3109 only represent each one once. The set is indexed/searched by
3110 ordering on the stack trace vectors.
3111
sewardj849b0ed2008-12-21 10:43:10 +00003112 2. A SparseWA of OldRefs. These store information about each old
3113 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003114 location for which the information is recorded. For LRU
3115 purposes, each OldRef also contains a generation number,
3116 indicating when it was most recently accessed.
3117
3118 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003119 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3120 size) triples to RCECs. This allows us to collect the last
3121 access-traceback by up to N_OLDREF_ACCS different triples for
3122 this location. The accs[] array is a MTF-array. If a binding
3123 falls off the end, that's too bad -- we will lose info about
3124 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003125
sewardj849b0ed2008-12-21 10:43:10 +00003126 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003127 whose generation numbers are below some threshold; hence doing
3128 approximate LRU discarding. For each discarded OldRef we must
3129 of course decrement the reference count on the all RCECs it
3130 refers to, in order that entries from (1) eventually get
3131 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003132
3133 A major improvement in reliability of this mechanism would be to
3134 have a dynamically sized OldRef.accs[] array, so no entries ever
3135 fall off the end. In investigations (Dec 08) it appears that a
3136 major cause for the non-availability of conflicting-access traces
3137 in race reports is caused by the fixed size of this array. I
3138 suspect for most OldRefs, only a few entries are used, but for a
3139 minority of cases there is an overflow, leading to info lossage.
3140 Investigations also suggest this is very workload and scheduling
3141 sensitive. Therefore a dynamic sizing would be better.
3142
3143 However, dynamic sizing would defeat the use of a GroupAllocator
3144 for OldRef structures. And that's important for performance. So
3145 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003146*/
3147
3148
3149static UWord stats__ctxt_rcdec1 = 0;
3150static UWord stats__ctxt_rcdec2 = 0;
3151static UWord stats__ctxt_rcdec3 = 0;
3152static UWord stats__ctxt_rcdec_calls = 0;
3153static UWord stats__ctxt_rcdec_discards = 0;
3154static UWord stats__ctxt_rcdec1_eq = 0;
3155
3156static UWord stats__ctxt_tab_curr = 0;
3157static UWord stats__ctxt_tab_max = 0;
3158
3159static UWord stats__ctxt_tab_qs = 0;
3160static UWord stats__ctxt_tab_cmps = 0;
3161
3162
3163///////////////////////////////////////////////////////
3164//// Part (1): An OSet of RCECs
3165///
3166
3167#define N_FRAMES 8
3168
3169// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3170#define RCEC_MAGIC 0xab88abb2UL
3171
3172//#define N_RCEC_TAB 98317 /* prime */
3173#define N_RCEC_TAB 196613 /* prime */
3174
3175typedef
3176 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003177 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003178 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003179 UWord rc;
3180 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003181 UWord frames_hash; /* hash of all the frames */
3182 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003183 }
3184 RCEC;
3185
3186static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3187
3188
3189/* Gives an arbitrary total order on RCEC .frames fields */
3190static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3191 Word i;
3192 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3193 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003194 if (ec1->frames_hash < ec2->frames_hash) return -1;
3195 if (ec1->frames_hash > ec2->frames_hash) return 1;
3196 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003197 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003198 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003199 }
3200 return 0;
3201}
3202
3203
3204/* Dec the ref of this RCEC. */
3205static void ctxt__rcdec ( RCEC* ec )
3206{
3207 stats__ctxt_rcdec_calls++;
3208 tl_assert(ec && ec->magic == RCEC_MAGIC);
3209 tl_assert(ec->rc > 0);
3210 ec->rc--;
3211}
3212
3213static void ctxt__rcinc ( RCEC* ec )
3214{
3215 tl_assert(ec && ec->magic == RCEC_MAGIC);
3216 ec->rc++;
3217}
3218
3219
sewardjd86e3a22008-12-03 11:39:37 +00003220//////////// BEGIN RCEC group allocator
3221static GroupAlloc rcec_group_allocator;
3222
3223static RCEC* alloc_RCEC ( void ) {
3224 return gal_Alloc ( &rcec_group_allocator );
3225}
3226
3227static void free_RCEC ( RCEC* rcec ) {
3228 tl_assert(rcec->magic == RCEC_MAGIC);
3229 gal_Free( &rcec_group_allocator, rcec );
3230}
3231//////////// END OldRef group allocator
3232
3233
sewardjf98e1c02008-10-25 16:22:41 +00003234/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3235 move it one step closer the the front of the list, so as to make
3236 subsequent searches for it cheaper. */
3237static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3238{
3239 RCEC *ec0, *ec1, *ec2;
3240 if (ec == *headp)
3241 tl_assert(0); /* already at head of list */
3242 tl_assert(ec != NULL);
3243 ec0 = *headp;
3244 ec1 = NULL;
3245 ec2 = NULL;
3246 while (True) {
3247 if (ec0 == NULL || ec0 == ec) break;
3248 ec2 = ec1;
3249 ec1 = ec0;
3250 ec0 = ec0->next;
3251 }
3252 tl_assert(ec0 == ec);
3253 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3254 RCEC* tmp;
3255 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3256 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3257 closer to the start of the list. */
3258 tl_assert(ec2->next == ec1);
3259 tl_assert(ec1->next == ec0);
3260 tmp = ec0->next;
3261 ec2->next = ec0;
3262 ec0->next = ec1;
3263 ec1->next = tmp;
3264 }
3265 else
3266 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3267 /* it's second in the list. */
3268 tl_assert(*headp == ec1);
3269 tl_assert(ec1->next == ec0);
3270 ec1->next = ec0->next;
3271 ec0->next = ec1;
3272 *headp = ec0;
3273 }
3274}
3275
3276
3277/* Find the given RCEC in the tree, and return a pointer to it. Or,
3278 if not present, add the given one to the tree (by making a copy of
3279 it, so the caller can immediately deallocate the original) and
3280 return a pointer to the copy. The caller can safely have 'example'
3281 on its stack, since we will always return a pointer to a copy of
3282 it, not to the original. Note that the inserted node will have .rc
3283 of zero and so the caller must immediatly increment it. */
3284__attribute__((noinline))
3285static RCEC* ctxt__find_or_add ( RCEC* example )
3286{
3287 UWord hent;
3288 RCEC* copy;
3289 tl_assert(example && example->magic == RCEC_MAGIC);
3290 tl_assert(example->rc == 0);
3291
3292 /* Search the hash table to see if we already have it. */
3293 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003294 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003295 copy = contextTab[hent];
3296 while (1) {
3297 if (!copy) break;
3298 tl_assert(copy->magic == RCEC_MAGIC);
3299 stats__ctxt_tab_cmps++;
3300 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3301 copy = copy->next;
3302 }
3303
3304 if (copy) {
3305 tl_assert(copy != example);
3306 /* optimisation: if it's not at the head of its list, move 1
3307 step fwds, to make future searches cheaper */
3308 if (copy != contextTab[hent]) {
3309 move_RCEC_one_step_forward( &contextTab[hent], copy );
3310 }
3311 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003312 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00003313 tl_assert(copy != example);
3314 *copy = *example;
3315 copy->next = contextTab[hent];
3316 contextTab[hent] = copy;
3317 stats__ctxt_tab_curr++;
3318 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3319 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3320 }
3321 return copy;
3322}
3323
3324static inline UWord ROLW ( UWord w, Int n )
3325{
3326 Int bpw = 8 * sizeof(UWord);
3327 w = (w << n) | (w >> (bpw-n));
3328 return w;
3329}
3330
3331__attribute__((noinline))
3332static RCEC* get_RCEC ( Thr* thr )
3333{
3334 UWord hash, i;
3335 RCEC example;
3336 example.magic = RCEC_MAGIC;
3337 example.rc = 0;
3338 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00003339 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00003340 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00003341 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003342 hash ^= example.frames[i];
3343 hash = ROLW(hash, 19);
3344 }
njn6c83d5e2009-05-05 23:46:24 +00003345 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00003346 return ctxt__find_or_add( &example );
3347}
3348
3349///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00003350//// Part (2):
3351/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00003352///
3353
3354// (UInt) `echo "Old Reference Information" | md5sum`
3355#define OldRef_MAGIC 0x30b1f075UL
3356
sewardjc5ea9962008-12-07 01:41:46 +00003357/* Records an access: a thread and a context. The size
3358 (1,2,4,8) and read-or-writeness are also encoded as
3359 follows: bottom bit of .thr is 1 if write, 0 if read
3360 bottom 2 bits of .rcec are encode size:
3361 00 = 1, 01 = 2, 10 = 4, 11 = 8
3362*/
sewardjf98e1c02008-10-25 16:22:41 +00003363typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3364
sewardj849b0ed2008-12-21 10:43:10 +00003365#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00003366
3367typedef
3368 struct {
sewardjd86e3a22008-12-03 11:39:37 +00003369 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003370 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00003371 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00003372 /* unused slots in this array have .thr == NULL */
3373 Thr_n_RCEC accs[N_OLDREF_ACCS];
3374 }
3375 OldRef;
3376
sewardjd86e3a22008-12-03 11:39:37 +00003377
3378//////////// BEGIN OldRef group allocator
3379static GroupAlloc oldref_group_allocator;
3380
3381static OldRef* alloc_OldRef ( void ) {
3382 return gal_Alloc ( &oldref_group_allocator );
3383}
3384
3385static void free_OldRef ( OldRef* r ) {
3386 tl_assert(r->magic == OldRef_MAGIC);
3387 gal_Free( &oldref_group_allocator, r );
3388}
3389//////////// END OldRef group allocator
3390
sewardjd86e3a22008-12-03 11:39:37 +00003391
sewardjbc307e52008-12-06 22:10:54 +00003392static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3393static UWord oldrefGen = 0; /* current LRU generation # */
3394static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3395static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00003396
sewardjc5ea9962008-12-07 01:41:46 +00003397inline static void* ptr_or_UWord ( void* p, UWord w ) {
3398 return (void*)( ((UWord)p) | ((UWord)w) );
3399}
3400inline static void* ptr_and_UWord ( void* p, UWord w ) {
3401 return (void*)( ((UWord)p) & ((UWord)w) );
3402}
3403
sewardj1669cc72008-12-13 01:20:21 +00003404inline static UInt min_UInt ( UInt a, UInt b ) {
3405 return a < b ? a : b;
3406}
3407
sewardja781be62008-12-08 00:12:28 +00003408/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3409 first interval is lower, 1 if the first interval is higher, and 0
3410 if there is any overlap. Redundant paranoia with casting is there
3411 following what looked distinctly like a bug in gcc-4.1.2, in which
3412 some of the comparisons were done signedly instead of
3413 unsignedly. */
3414/* Copied from exp-ptrcheck/sg_main.c */
3415static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3416 Addr a2, SizeT n2 ) {
3417 UWord a1w = (UWord)a1;
3418 UWord n1w = (UWord)n1;
3419 UWord a2w = (UWord)a2;
3420 UWord n2w = (UWord)n2;
3421 tl_assert(n1w > 0 && n2w > 0);
3422 if (a1w + n1w <= a2w) return -1L;
3423 if (a2w + n2w <= a1w) return 1L;
3424 return 0;
3425}
3426
sewardjc5ea9962008-12-07 01:41:46 +00003427static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00003428{
sewardjd86e3a22008-12-03 11:39:37 +00003429 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00003430 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00003431 Word i, j;
3432 UWord keyW, valW;
3433 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003434
sewardjc5ea9962008-12-07 01:41:46 +00003435 rcec = get_RCEC( thr );
3436 ctxt__rcinc(rcec);
3437
3438 /* encode the size and writeness of the transaction in the bottom
3439 two bits of thr and rcec. */
3440 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3441 switch (szB) {
3442 /* This doesn't look particularly branch-predictor friendly. */
3443 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3444 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3445 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3446 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3447 default: tl_assert(0);
3448 }
3449
3450 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003451 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003452
sewardjd86e3a22008-12-03 11:39:37 +00003453 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003454
3455 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003456 see if we have a stack trace pertaining to this (thread, R/W,
3457 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003458 tl_assert(keyW == a);
3459 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003460 tl_assert(ref->magic == OldRef_MAGIC);
3461
3462 tl_assert(thr);
3463 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003464 if (ref->accs[i].thr != thr)
3465 continue;
3466 /* since .thr encodes both the accessing thread and the
3467 read/writeness, we know now that at least those features
3468 of the access match this entry. So we just need to check
3469 the size indication. Do this by inspecting the lowest 2 bits of
3470 .rcec, which contain the encoded size info. */
3471 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3472 continue;
3473 /* else we have a match, so stop looking. */
3474 break;
sewardjf98e1c02008-10-25 16:22:41 +00003475 }
3476
3477 if (i < N_OLDREF_ACCS) {
3478 /* thread 'thr' has an entry at index 'i'. Update it. */
3479 if (i > 0) {
3480 Thr_n_RCEC tmp = ref->accs[i-1];
3481 ref->accs[i-1] = ref->accs[i];
3482 ref->accs[i] = tmp;
3483 i--;
3484 }
sewardjc5ea9962008-12-07 01:41:46 +00003485 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003486 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003487 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3488 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003489 tl_assert(ref->accs[i].thr == thr);
3490 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003491 /* No entry for this (thread, R/W, size) triple. Shuffle all
3492 of them down one slot, and put the new entry at the start
3493 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003494 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3495 /* the last slot is in use. We must dec the rc on the
3496 associated rcec. */
3497 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3498 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003499 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3500 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003501 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003502 } else {
3503 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3504 }
3505 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3506 ref->accs[j] = ref->accs[j-1];
3507 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003508 ref->accs[0].rcec = rcec;
3509 /* thr==NULL is used to signify an empty slot, so we can't
3510 add a NULL thr. */
3511 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003512 }
3513
3514 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003515
3516 } else {
3517
3518 /* We don't have a record for this address. Create a new one. */
3519 if (oldrefTreeN >= oldrefGenIncAt) {
3520 oldrefGen++;
3521 oldrefGenIncAt = oldrefTreeN + 50000;
3522 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3523 oldrefGen, oldrefTreeN );
3524 }
sewardjd86e3a22008-12-03 11:39:37 +00003525
3526 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003527 ref->magic = OldRef_MAGIC;
3528 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003529 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003530 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003531 /* thr==NULL is used to signify an empty slot, so we can't add a
3532 NULL thr. */
3533 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003534 for (j = 1; j < N_OLDREF_ACCS; j++) {
3535 ref->accs[j].thr = NULL;
3536 ref->accs[j].rcec = NULL;
3537 }
sewardjbc307e52008-12-06 22:10:54 +00003538 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003539 oldrefTreeN++;
3540
3541 }
3542}
3543
3544
sewardjc5ea9962008-12-07 01:41:46 +00003545Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3546 /*OUT*/Thr** resThr,
3547 /*OUT*/SizeT* resSzB,
3548 /*OUT*/Bool* resIsW,
3549 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003550{
sewardja781be62008-12-08 00:12:28 +00003551 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003552 OldRef* ref;
3553 UWord keyW, valW;
3554 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003555
sewardjc5ea9962008-12-07 01:41:46 +00003556 Thr* cand_thr;
3557 RCEC* cand_rcec;
3558 Bool cand_isW;
3559 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003560 Addr cand_a;
3561
3562 Addr toCheck[15];
3563 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003564
3565 tl_assert(thr);
3566 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003567
sewardja781be62008-12-08 00:12:28 +00003568 toCheck[nToCheck++] = a;
3569 for (i = -7; i < (Word)szB; i++) {
3570 if (i != 0)
3571 toCheck[nToCheck++] = a + i;
3572 }
3573 tl_assert(nToCheck <= 15);
3574
3575 /* Now see if we can find a suitable matching event for
3576 any of the addresses in toCheck[0 .. nToCheck-1]. */
3577 for (j = 0; j < nToCheck; j++) {
3578
3579 cand_a = toCheck[j];
3580 // VG_(printf)("test %ld %p\n", j, cand_a);
3581
3582 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3583 if (!b)
3584 continue;
3585
sewardjd86e3a22008-12-03 11:39:37 +00003586 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003587 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003588 tl_assert(ref->magic == OldRef_MAGIC);
3589 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3590
sewardjc5ea9962008-12-07 01:41:46 +00003591 cand_thr = NULL;
3592 cand_rcec = NULL;
3593 cand_isW = False;
3594 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003595
sewardjc5ea9962008-12-07 01:41:46 +00003596 for (i = 0; i < N_OLDREF_ACCS; i++) {
3597 Thr_n_RCEC* cand = &ref->accs[i];
3598 cand_thr = ptr_and_UWord(cand->thr, ~3);
3599 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3600 /* Decode the writeness from the bottom bit of .thr. */
3601 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3602 /* Decode the size from the bottom two bits of .rcec. */
3603 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3604 case 0: cand_szB = 1; break;
3605 case 1: cand_szB = 2; break;
3606 case 2: cand_szB = 4; break;
3607 case 3: cand_szB = 8; break;
3608 default: tl_assert(0);
3609 }
3610
3611 if (cand_thr == NULL)
3612 /* This slot isn't in use. Ignore it. */
3613 continue;
3614
3615 if (cand_thr == thr)
3616 /* This is an access by the same thread, but we're only
3617 interested in accesses from other threads. Ignore. */
3618 continue;
3619
3620 if ((!cand_isW) && (!isW))
3621 /* We don't want to report a read racing against another
3622 read; that's stupid. So in this case move on. */
3623 continue;
3624
sewardja781be62008-12-08 00:12:28 +00003625 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3626 /* No overlap with the access we're asking about. Ignore. */
3627 continue;
3628
sewardjc5ea9962008-12-07 01:41:46 +00003629 /* We have a match. Stop searching. */
3630 break;
3631 }
3632
3633 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3634
sewardja781be62008-12-08 00:12:28 +00003635 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003636 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003637 /* return with success */
3638 tl_assert(cand_thr);
3639 tl_assert(cand_rcec);
3640 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3641 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003642 /* Count how many non-zero frames we have. */
3643 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3644 for (n = 0; n < maxNFrames; n++) {
3645 if (0 == cand_rcec->frames[n]) break;
3646 }
3647 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003648 *resThr = cand_thr;
3649 *resSzB = cand_szB;
3650 *resIsW = cand_isW;
3651 return True;
3652 }
sewardjc5ea9962008-12-07 01:41:46 +00003653
sewardja781be62008-12-08 00:12:28 +00003654 /* consider next address in toCheck[] */
3655 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003656
sewardja781be62008-12-08 00:12:28 +00003657 /* really didn't find anything. */
3658 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003659}
3660
3661static void event_map_init ( void )
3662{
3663 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003664
3665 /* Context (RCEC) group allocator */
3666 init_GroupAlloc ( &rcec_group_allocator,
3667 sizeof(RCEC),
3668 1000 /* RCECs per group */,
3669 HG_(zalloc),
3670 "libhb.event_map_init.1 (RCEC groups)",
3671 HG_(free) );
3672
3673 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003674 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003675 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003676 N_RCEC_TAB * sizeof(RCEC*) );
3677 tl_assert(contextTab);
3678 for (i = 0; i < N_RCEC_TAB; i++)
3679 contextTab[i] = NULL;
3680
sewardjd86e3a22008-12-03 11:39:37 +00003681 /* Oldref group allocator */
3682 init_GroupAlloc ( &oldref_group_allocator,
3683 sizeof(OldRef),
3684 1000 /* OldRefs per group */,
3685 HG_(zalloc),
3686 "libhb.event_map_init.3 (OldRef groups)",
3687 HG_(free) );
3688
sewardjd86e3a22008-12-03 11:39:37 +00003689 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003690 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003691 oldrefTree = VG_(newSWA)(
3692 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003693 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003694 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003695 );
3696 tl_assert(oldrefTree);
3697
3698 oldrefGen = 0;
3699 oldrefGenIncAt = 0;
3700 oldrefTreeN = 0;
3701}
3702
3703static void event_map__check_reference_counts ( Bool before )
3704{
3705 RCEC* rcec;
3706 OldRef* oldref;
3707 Word i;
3708 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003709 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003710
3711 /* Set the 'check' reference counts to zero. Also, optionally
3712 check that the real reference counts are non-zero. We allow
3713 these to fall to zero before a GC, but the GC must get rid of
3714 all those that are zero, hence none should be zero after a
3715 GC. */
3716 for (i = 0; i < N_RCEC_TAB; i++) {
3717 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3718 nEnts++;
3719 tl_assert(rcec);
3720 tl_assert(rcec->magic == RCEC_MAGIC);
3721 if (!before)
3722 tl_assert(rcec->rc > 0);
3723 rcec->rcX = 0;
3724 }
3725 }
3726
3727 /* check that the stats are sane */
3728 tl_assert(nEnts == stats__ctxt_tab_curr);
3729 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3730
3731 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003732 VG_(initIterSWA)( oldrefTree );
3733 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003734 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003735 tl_assert(oldref->magic == OldRef_MAGIC);
3736 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003737 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3738 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3739 if (aThr) {
3740 tl_assert(aRef);
3741 tl_assert(aRef->magic == RCEC_MAGIC);
3742 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003743 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003744 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003745 }
3746 }
3747 }
3748
3749 /* compare check ref counts with actual */
3750 for (i = 0; i < N_RCEC_TAB; i++) {
3751 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3752 tl_assert(rcec->rc == rcec->rcX);
3753 }
3754 }
3755}
3756
sewardj8fd92d32008-11-20 23:17:01 +00003757__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003758static void event_map_maybe_GC ( void )
3759{
3760 OldRef* oldref;
3761 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003762 XArray* refs2del;
3763 Word i, j, n2del;
3764
sewardj8fd92d32008-11-20 23:17:01 +00003765 UWord* genMap = NULL;
3766 UWord genMap_min = 0;
3767 UWord genMap_size = 0;
3768
sewardj849b0ed2008-12-21 10:43:10 +00003769 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003770 return;
3771
3772 if (0)
3773 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3774
sewardj849b0ed2008-12-21 10:43:10 +00003775 /* Check for sane command line params. Limit values must match
3776 those in hg_process_cmd_line_option. */
3777 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
3778 tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
3779
sewardj8f5374e2008-12-07 11:40:17 +00003780 /* Check our counting is sane (expensive) */
3781 if (CHECK_CEM)
3782 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003783
sewardj8f5374e2008-12-07 11:40:17 +00003784 /* Check the reference counts (expensive) */
3785 if (CHECK_CEM)
3786 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003787
sewardj8fd92d32008-11-20 23:17:01 +00003788 /* Compute the distribution of generation values in the ref tree.
3789 There are likely only to be a few different generation numbers
3790 in the whole tree, but we don't know what they are. Hence use a
3791 dynamically resized array of counters. The array is genMap[0
3792 .. genMap_size-1], where genMap[0] is the count for the
3793 generation number genMap_min, genMap[1] is the count for
3794 genMap_min+1, etc. If a new number is seen outside the range
3795 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3796 copied into a larger array, and genMap_min and genMap_size are
3797 adjusted accordingly. */
3798
sewardjf98e1c02008-10-25 16:22:41 +00003799 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003800
sewardjbc307e52008-12-06 22:10:54 +00003801 VG_(initIterSWA)( oldrefTree );
3802 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003803
sewardjd86e3a22008-12-03 11:39:37 +00003804 UWord ea, key;
3805 oldref = (OldRef*)valW;
3806 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003807
3808 /* BEGIN find 'ea', which is the index in genMap holding the
3809 count for generation number 'key'. */
3810 if (UNLIKELY(genMap == NULL)) {
3811 /* deal with the first key to be seen, so that the following
3812 cases don't need to handle the complexity of a NULL count
3813 array. */
3814 genMap_min = key;
3815 genMap_size = 1;
3816 genMap = HG_(zalloc)( "libhb.emmG.1a",
3817 genMap_size * sizeof(UWord) );
3818 ea = 0;
3819 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3820 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003821 }
sewardj8fd92d32008-11-20 23:17:01 +00003822 else
3823 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3824 /* this is the expected (almost-always-happens) case: 'key'
3825 is already mapped in the array. */
3826 ea = key - genMap_min;
3827 }
3828 else
3829 if (key < genMap_min) {
3830 /* 'key' appears before the start of the current array.
3831 Extend the current array by allocating a larger one and
3832 copying the current one to the upper end of it. */
3833 Word more;
3834 UWord* map2;
3835 more = genMap_min - key;
3836 tl_assert(more > 0);
3837 map2 = HG_(zalloc)( "libhb.emmG.1b",
3838 (genMap_size + more) * sizeof(UWord) );
3839 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3840 HG_(free)( genMap );
3841 genMap = map2;
3842 genMap_size += more;
3843 genMap_min -= more;
3844 ea = 0;
3845 tl_assert(genMap_min == key);
3846 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3847 key, genMap_min, genMap_min+genMap_size- 1 );
3848 }
3849 else {
3850 /* 'key' appears after the end of the current array. Extend
3851 the current array by allocating a larger one and copying
3852 the current one to the lower end of it. */
3853 Word more;
3854 UWord* map2;
3855 tl_assert(key >= genMap_min + genMap_size);
3856 more = key - (genMap_min + genMap_size) + 1;
3857 tl_assert(more > 0);
3858 map2 = HG_(zalloc)( "libhb.emmG.1c",
3859 (genMap_size + more) * sizeof(UWord) );
3860 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3861 HG_(free)( genMap );
3862 genMap = map2;
3863 genMap_size += more;
3864 ea = genMap_size - 1;;
3865 tl_assert(genMap_min + genMap_size - 1 == key);
3866 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3867 key, genMap_min, genMap_min+genMap_size- 1 );
3868 }
3869 /* END find 'ea' from 'key' */
3870
3871 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003872 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003873 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003874 }
3875
sewardj8fd92d32008-11-20 23:17:01 +00003876 tl_assert(genMap);
3877 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003878
sewardj8fd92d32008-11-20 23:17:01 +00003879 /* Sanity check what we just computed */
3880 { UWord sum = 0;
3881 for (i = 0; i < genMap_size; i++) {
3882 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3883 i + genMap_min, genMap[i] );
3884 sum += genMap[i];
3885 }
3886 tl_assert(sum == oldrefTreeN);
3887 }
3888
3889 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003890 retained = oldrefTreeN;
3891 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003892
3893 for (i = 0; i < genMap_size; i++) {
3894 keyW = i + genMap_min;
3895 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003896 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3897 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3898 tl_assert(keyW >= maxGen);
3899 tl_assert(retained >= valW);
3900 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003901 > (UWord)(HG_(clo_conflict_cache_size)
3902 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003903 retained -= valW;
3904 maxGen = keyW;
3905 } else {
3906 break;
3907 }
3908 }
sewardjf98e1c02008-10-25 16:22:41 +00003909
sewardj8fd92d32008-11-20 23:17:01 +00003910 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003911
sewardj9b1f0fd2008-11-18 23:40:00 +00003912 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003913
3914 /* Now make up a big list of the oldrefTree entries we want to
3915 delete. We can't simultaneously traverse the tree and delete
3916 stuff from it, so first we need to copy them off somewhere
3917 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003918 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003919 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003920
sewardj9b1f0fd2008-11-18 23:40:00 +00003921 if (retained < oldrefTreeN) {
3922
3923 /* This is the normal (expected) case. We discard any ref whose
3924 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003925 VG_(initIterSWA)( oldrefTree );
3926 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003927 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003928 tl_assert(oldref->magic == OldRef_MAGIC);
3929 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003930 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003931 }
sewardjf98e1c02008-10-25 16:22:41 +00003932 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003933 if (VG_(clo_verbosity) > 1) {
3934 VG_(message)(Vg_DebugMsg,
3935 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00003936 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003937 maxGen, retained );
3938 }
3939
3940 } else {
3941
3942 static UInt rand_seed = 0; /* leave as static */
3943
3944 /* Degenerate case: there's only one generation in the entire
3945 tree, so we need to have some other way of deciding which
3946 refs to throw away. Just throw out half of them randomly. */
3947 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003948 VG_(initIterSWA)( oldrefTree );
3949 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003950 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003951 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003952 tl_assert(oldref->magic == OldRef_MAGIC);
3953 n = VG_(random)( &rand_seed );
3954 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003955 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003956 retained--;
3957 }
3958 }
3959 if (VG_(clo_verbosity) > 1) {
3960 VG_(message)(Vg_DebugMsg,
3961 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00003962 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003963 retained );
3964 }
3965
sewardjf98e1c02008-10-25 16:22:41 +00003966 }
3967
3968 n2del = VG_(sizeXA)( refs2del );
3969 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3970
3971 if (0) VG_(printf)("%s","deleting entries\n");
3972 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003973 Bool b;
3974 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003975 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003976 tl_assert(b);
3977 tl_assert(keyW == ga2del);
3978 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003979 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00003980 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
3981 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
3982 if (aRef) {
3983 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003984 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00003985 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00003986 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003987 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00003988 }
3989 }
sewardjd86e3a22008-12-03 11:39:37 +00003990
3991 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00003992 }
3993
3994 VG_(deleteXA)( refs2del );
3995
sewardjc5ea9962008-12-07 01:41:46 +00003996 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00003997
3998 oldrefTreeN = retained;
3999 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4000
4001 /* Throw away all RCECs with zero reference counts */
4002 for (i = 0; i < N_RCEC_TAB; i++) {
4003 RCEC** pp = &contextTab[i];
4004 RCEC* p = *pp;
4005 while (p) {
4006 if (p->rc == 0) {
4007 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004008 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004009 p = *pp;
4010 tl_assert(stats__ctxt_tab_curr > 0);
4011 stats__ctxt_tab_curr--;
4012 } else {
4013 pp = &p->next;
4014 p = p->next;
4015 }
4016 }
4017 }
4018
sewardj8f5374e2008-12-07 11:40:17 +00004019 /* Check the reference counts (expensive) */
4020 if (CHECK_CEM)
4021 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004022
4023 //if (0)
4024 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4025 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4026
4027}
4028
4029
4030/////////////////////////////////////////////////////////
4031// //
4032// Core MSM //
4033// //
4034/////////////////////////////////////////////////////////
4035
sewardj23f12002009-07-24 08:45:08 +00004036/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4037 Nov 08, and again after [...],
4038 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004039
sewardj23f12002009-07-24 08:45:08 +00004040static ULong stats__msmcread = 0;
4041static ULong stats__msmcread_change = 0;
4042static ULong stats__msmcwrite = 0;
4043static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004044
4045__attribute__((noinline))
4046static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004047 Addr acc_addr, SizeT szB, Bool isWrite,
4048 VtsID vtsConstraint, VtsID vtsKlock )
sewardjf98e1c02008-10-25 16:22:41 +00004049{
sewardjc5ea9962008-12-07 01:41:46 +00004050 /* Call here to report a race. We just hand it onwards to
4051 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004052 error is going to be collected, then, at history_level 2, that
4053 queries the conflicting-event map. The alternative would be to
4054 query it right here. But that causes a lot of pointless queries
4055 for errors which will shortly be discarded as duplicates, and
4056 can become a performance overhead; so we defer the query until
4057 we know the error is not a duplicate. */
4058
4059 /* Stacks for the bounds of the (or one of the) conflicting
4060 segment(s). These are only set at history_level 1. */
4061 ExeContext* hist1_seg_start = NULL;
4062 ExeContext* hist1_seg_end = NULL;
4063 Thread* hist1_conf_thr = NULL;
4064
4065 tl_assert(acc_thr);
sewardjc5ea9962008-12-07 01:41:46 +00004066 tl_assert(acc_thr->opaque);
sewardj23f12002009-07-24 08:45:08 +00004067 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4068
4069 if (HG_(clo_history_level) == 1) {
4070 Bool found;
4071 Word firstIx, lastIx;
4072 ULong_n_EC key;
4073
4074 /* At history_level 1, we must round up the relevant stack-pair
4075 for the conflicting segment right now. This is because
4076 deferring it is complex; we can't (easily) put vtsKlock and
4077 vtsConstraint into the XError and wait for later without
4078 getting tied up in difficulties with VtsID reference
4079 counting. So just do it now. */
4080 Thr* confThr;
4081 ULong confTym = 0;
4082 /* Which thread are we in conflict with? There may be more than
4083 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4084 (in fact it's the one with the lowest Thr* value). */
4085 confThr = VtsID__findFirst_notLEQ( vtsConstraint, vtsKlock );
4086 /* This must exist! since if it was NULL then there's no
4087 conflict (semantics of return value of VtsID__findFirst_notLEQ) */
4088 tl_assert(confThr);
4089
4090 /* Get the scalar clock value that the conflicting thread
4091 introduced into the constraint. A careful examination of the
4092 base machine rules shows that this must be the same as the
4093 conflicting thread's scalar clock when it created this
4094 constraint. Hence we know the scalar clock of the
4095 conflicting thread when the conflicting access was made. */
4096 confTym = VtsID__indexAt( vtsConstraint, confThr );
4097
4098 /* Using this scalar clock, index into the conflicting thread's
4099 collection of stack traces made each time its vector clock
4100 (hence its scalar clock) changed. This gives the stack
4101 traces at the start and end of the conflicting segment (well,
4102 as per comment just above, of one of the conflicting
4103 segments, if there are more than one). */
4104 key.ull = confTym;
4105 key.ec = NULL;
4106 /* tl_assert(confThr); -- asserted just above */
4107 tl_assert(confThr->local_Krs_n_stacks);
4108 firstIx = lastIx = 0;
4109 found = VG_(lookupXA_UNSAFE)(
4110 confThr->local_Krs_n_stacks,
4111 &key, &firstIx, &lastIx,
4112 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4113 );
4114 if (0) VG_(printf)("record_race_info %u %u confThr %p "
4115 "confTym %llu found %d (%lu,%lu)\n",
4116 vtsConstraint, vtsKlock,
4117 confThr, confTym, found, firstIx, lastIx);
4118 /* We can't indefinitely collect stack traces at VTS
4119 transitions, since we'd eventually run out of memory. Hence
4120 note_local_Kr_n_stack_for will eventually throw away old
4121 ones, which in turn means we might fail to find index value
4122 confTym in the array. */
4123 if (found) {
4124 ULong_n_EC *pair_start, *pair_end;
4125 pair_start
4126 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Krs_n_stacks, lastIx );
4127 hist1_seg_start = pair_start->ec;
4128 if (lastIx+1 < VG_(sizeXA)( confThr->local_Krs_n_stacks )) {
4129 pair_end
4130 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Krs_n_stacks,
4131 lastIx+1 );
4132 /* from properties of VG_(lookupXA) and the comparison fn used: */
4133 tl_assert(pair_start->ull < pair_end->ull);
4134 hist1_seg_end = pair_end->ec;
4135 } else {
4136 if (confThr->still_alive)
4137 hist1_seg_end = main_get_EC( confThr );
4138 }
4139 // seg_start could be NULL iff this is the first stack in the thread
4140 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4141 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4142 hist1_conf_thr = confThr->opaque;
4143 }
4144 }
4145
sewardjc5ea9962008-12-07 01:41:46 +00004146 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004147 szB, isWrite,
4148 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004149}
4150
4151static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004152 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004153 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004154 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4155 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004156}
4157
4158
4159/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004160static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004161 /* The following are only needed for
4162 creating error reports. */
4163 Thr* acc_thr,
4164 Addr acc_addr, SizeT szB )
4165{
4166 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004167 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004168
4169 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004170 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004171 tl_assert(is_sane_SVal_C(svOld));
4172 }
4173
sewardj1c0ce7a2009-07-01 08:10:49 +00004174 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004175 VtsID tviR = acc_thr->viR;
4176 VtsID tviW = acc_thr->viW;
4177 VtsID rmini = SVal__unC_Rmin(svOld);
4178 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004179 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4180 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004181 /* no race */
4182 /* Note: RWLOCK subtlety: use tviW, not tviR */
4183 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4184 goto out;
4185 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004186 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004187 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4188 tl_assert(leqxx);
4189 // same as in non-race case
4190 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4191 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
4192 rmini, tviR );
sewardjf98e1c02008-10-25 16:22:41 +00004193 goto out;
4194 }
4195 }
4196 if (SVal__isA(svOld)) {
4197 /* reading no-access memory (sigh); leave unchanged */
4198 /* check for no pollution */
4199 tl_assert(svOld == SVal_NOACCESS);
4200 svNew = SVal_NOACCESS;
4201 goto out;
4202 }
sewardj23f12002009-07-24 08:45:08 +00004203 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004204 tl_assert(0);
4205
4206 out:
sewardj8f5374e2008-12-07 11:40:17 +00004207 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004208 tl_assert(is_sane_SVal_C(svNew));
4209 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004210 if (UNLIKELY(svNew != svOld)) {
4211 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004212 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004213 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004214 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004215 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004216 }
4217 }
4218 return svNew;
4219}
4220
4221
4222/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004223static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004224 /* The following are only needed for
4225 creating error reports. */
4226 Thr* acc_thr,
4227 Addr acc_addr, SizeT szB )
4228{
4229 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004230 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004231
4232 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004233 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004234 tl_assert(is_sane_SVal_C(svOld));
4235 }
4236
sewardj1c0ce7a2009-07-01 08:10:49 +00004237 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004238 VtsID tviW = acc_thr->viW;
4239 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004240 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4241 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004242 /* no race */
4243 svNew = SVal__mkC( tviW, tviW );
4244 goto out;
4245 } else {
4246 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004247 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004248 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4249 tl_assert(leqxx);
4250 // same as in non-race case
4251 // proof: in the non-race case, we have
4252 // rmini <= wmini (invar on constraints)
4253 // tviW <= tviR (invar on thread clocks)
4254 // wmini <= tviW (from run-time check)
4255 // hence from transitivity of <= we have
4256 // rmini <= wmini <= tviW
4257 // and so join(rmini,tviW) == tviW
4258 // and join(wmini,tviW) == tviW
4259 // qed.
4260 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4261 VtsID__join2(wmini, tviW) );
4262 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
4263 wmini, acc_thr->viW );
sewardjf98e1c02008-10-25 16:22:41 +00004264 goto out;
4265 }
4266 }
4267 if (SVal__isA(svOld)) {
4268 /* writing no-access memory (sigh); leave unchanged */
4269 /* check for no pollution */
4270 tl_assert(svOld == SVal_NOACCESS);
4271 svNew = SVal_NOACCESS;
4272 goto out;
4273 }
sewardj23f12002009-07-24 08:45:08 +00004274 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004275 tl_assert(0);
4276
4277 out:
sewardj8f5374e2008-12-07 11:40:17 +00004278 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004279 tl_assert(is_sane_SVal_C(svNew));
4280 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004281 if (UNLIKELY(svNew != svOld)) {
4282 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004283 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004284 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004285 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004286 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004287 }
4288 }
4289 return svNew;
4290}
4291
4292
4293/////////////////////////////////////////////////////////
4294// //
4295// Apply core MSM to specific memory locations //
4296// //
4297/////////////////////////////////////////////////////////
4298
sewardj23f12002009-07-24 08:45:08 +00004299/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004300
sewardj23f12002009-07-24 08:45:08 +00004301static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004302 CacheLine* cl;
4303 UWord cloff, tno, toff;
4304 SVal svOld, svNew;
4305 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004306 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004307 cl = get_cacheline(a);
4308 cloff = get_cacheline_offset(a);
4309 tno = get_treeno(a);
4310 toff = get_tree_offset(a); /* == 0 .. 7 */
4311 descr = cl->descrs[tno];
4312 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4313 SVal* tree = &cl->svals[tno << 3];
4314 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004315 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004316 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4317 }
4318 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004319 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004320 if (CHECK_ZSM)
4321 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004322 cl->svals[cloff] = svNew;
4323}
4324
sewardj23f12002009-07-24 08:45:08 +00004325static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004326 CacheLine* cl;
4327 UWord cloff, tno, toff;
4328 SVal svOld, svNew;
4329 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004330 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004331 cl = get_cacheline(a);
4332 cloff = get_cacheline_offset(a);
4333 tno = get_treeno(a);
4334 toff = get_tree_offset(a); /* == 0 .. 7 */
4335 descr = cl->descrs[tno];
4336 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4337 SVal* tree = &cl->svals[tno << 3];
4338 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004339 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004340 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4341 }
4342 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004343 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004344 if (CHECK_ZSM)
4345 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004346 cl->svals[cloff] = svNew;
4347}
4348
sewardj23f12002009-07-24 08:45:08 +00004349/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004350
sewardj23f12002009-07-24 08:45:08 +00004351static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004352 CacheLine* cl;
4353 UWord cloff, tno, toff;
4354 SVal svOld, svNew;
4355 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004356 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004357 if (UNLIKELY(!aligned16(a))) goto slowcase;
4358 cl = get_cacheline(a);
4359 cloff = get_cacheline_offset(a);
4360 tno = get_treeno(a);
4361 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4362 descr = cl->descrs[tno];
4363 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4364 if (valid_value_is_below_me_16(descr, toff)) {
4365 goto slowcase;
4366 } else {
4367 SVal* tree = &cl->svals[tno << 3];
4368 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4369 }
sewardj8f5374e2008-12-07 11:40:17 +00004370 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004371 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4372 }
4373 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004374 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004375 if (CHECK_ZSM)
4376 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004377 cl->svals[cloff] = svNew;
4378 return;
4379 slowcase: /* misaligned, or must go further down the tree */
4380 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004381 zsm_sapply08__msmcread( thr, a + 0 );
4382 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004383}
4384
sewardj23f12002009-07-24 08:45:08 +00004385static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004386 CacheLine* cl;
4387 UWord cloff, tno, toff;
4388 SVal svOld, svNew;
4389 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004390 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004391 if (UNLIKELY(!aligned16(a))) goto slowcase;
4392 cl = get_cacheline(a);
4393 cloff = get_cacheline_offset(a);
4394 tno = get_treeno(a);
4395 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4396 descr = cl->descrs[tno];
4397 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4398 if (valid_value_is_below_me_16(descr, toff)) {
4399 goto slowcase;
4400 } else {
4401 SVal* tree = &cl->svals[tno << 3];
4402 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4403 }
sewardj8f5374e2008-12-07 11:40:17 +00004404 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004405 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4406 }
4407 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004408 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004409 if (CHECK_ZSM)
4410 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004411 cl->svals[cloff] = svNew;
4412 return;
4413 slowcase: /* misaligned, or must go further down the tree */
4414 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004415 zsm_sapply08__msmcwrite( thr, a + 0 );
4416 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004417}
4418
sewardj23f12002009-07-24 08:45:08 +00004419/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004420
sewardj23f12002009-07-24 08:45:08 +00004421static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004422 CacheLine* cl;
4423 UWord cloff, tno, toff;
4424 SVal svOld, svNew;
4425 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004426 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004427 if (UNLIKELY(!aligned32(a))) goto slowcase;
4428 cl = get_cacheline(a);
4429 cloff = get_cacheline_offset(a);
4430 tno = get_treeno(a);
4431 toff = get_tree_offset(a); /* == 0 or 4 */
4432 descr = cl->descrs[tno];
4433 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4434 if (valid_value_is_above_me_32(descr, toff)) {
4435 SVal* tree = &cl->svals[tno << 3];
4436 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4437 } else {
4438 goto slowcase;
4439 }
sewardj8f5374e2008-12-07 11:40:17 +00004440 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004441 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4442 }
4443 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004444 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004445 if (CHECK_ZSM)
4446 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004447 cl->svals[cloff] = svNew;
4448 return;
4449 slowcase: /* misaligned, or must go further down the tree */
4450 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004451 zsm_sapply16__msmcread( thr, a + 0 );
4452 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004453}
4454
sewardj23f12002009-07-24 08:45:08 +00004455static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004456 CacheLine* cl;
4457 UWord cloff, tno, toff;
4458 SVal svOld, svNew;
4459 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004460 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004461 if (UNLIKELY(!aligned32(a))) goto slowcase;
4462 cl = get_cacheline(a);
4463 cloff = get_cacheline_offset(a);
4464 tno = get_treeno(a);
4465 toff = get_tree_offset(a); /* == 0 or 4 */
4466 descr = cl->descrs[tno];
4467 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4468 if (valid_value_is_above_me_32(descr, toff)) {
4469 SVal* tree = &cl->svals[tno << 3];
4470 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4471 } else {
4472 goto slowcase;
4473 }
sewardj8f5374e2008-12-07 11:40:17 +00004474 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004475 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4476 }
4477 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004478 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004479 if (CHECK_ZSM)
4480 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004481 cl->svals[cloff] = svNew;
4482 return;
4483 slowcase: /* misaligned, or must go further down the tree */
4484 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004485 zsm_sapply16__msmcwrite( thr, a + 0 );
4486 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004487}
4488
sewardj23f12002009-07-24 08:45:08 +00004489/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004490
sewardj23f12002009-07-24 08:45:08 +00004491static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004492 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004493 UWord cloff, tno;
4494 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004495 SVal svOld, svNew;
4496 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004497 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004498 if (UNLIKELY(!aligned64(a))) goto slowcase;
4499 cl = get_cacheline(a);
4500 cloff = get_cacheline_offset(a);
4501 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004502 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004503 descr = cl->descrs[tno];
4504 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4505 goto slowcase;
4506 }
4507 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004508 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004509 if (CHECK_ZSM)
4510 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004511 cl->svals[cloff] = svNew;
4512 return;
4513 slowcase: /* misaligned, or must go further down the tree */
4514 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004515 zsm_sapply32__msmcread( thr, a + 0 );
4516 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004517}
4518
sewardj23f12002009-07-24 08:45:08 +00004519static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004520 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004521 UWord cloff, tno;
4522 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004523 SVal svOld, svNew;
4524 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004525 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004526 if (UNLIKELY(!aligned64(a))) goto slowcase;
4527 cl = get_cacheline(a);
4528 cloff = get_cacheline_offset(a);
4529 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004530 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004531 descr = cl->descrs[tno];
4532 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4533 goto slowcase;
4534 }
4535 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004536 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004537 if (CHECK_ZSM)
4538 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004539 cl->svals[cloff] = svNew;
4540 return;
4541 slowcase: /* misaligned, or must go further down the tree */
4542 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004543 zsm_sapply32__msmcwrite( thr, a + 0 );
4544 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004545}
4546
sewardj23f12002009-07-24 08:45:08 +00004547/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004548
4549static
sewardj23f12002009-07-24 08:45:08 +00004550void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004551 CacheLine* cl;
4552 UWord cloff, tno, toff;
4553 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004554 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004555 cl = get_cacheline(a);
4556 cloff = get_cacheline_offset(a);
4557 tno = get_treeno(a);
4558 toff = get_tree_offset(a); /* == 0 .. 7 */
4559 descr = cl->descrs[tno];
4560 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4561 SVal* tree = &cl->svals[tno << 3];
4562 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4565 }
4566 tl_assert(svNew != SVal_INVALID);
4567 cl->svals[cloff] = svNew;
4568}
4569
sewardj23f12002009-07-24 08:45:08 +00004570/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004571
4572static
sewardj23f12002009-07-24 08:45:08 +00004573void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004574 CacheLine* cl;
4575 UWord cloff, tno, toff;
4576 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004577 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004578 if (UNLIKELY(!aligned16(a))) goto slowcase;
4579 cl = get_cacheline(a);
4580 cloff = get_cacheline_offset(a);
4581 tno = get_treeno(a);
4582 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4583 descr = cl->descrs[tno];
4584 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4585 if (valid_value_is_below_me_16(descr, toff)) {
4586 /* Writing at this level. Need to fix up 'descr'. */
4587 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4588 /* At this point, the tree does not match cl->descr[tno] any
4589 more. The assignments below will fix it up. */
4590 } else {
4591 /* We can't indiscriminately write on the w16 node as in the
4592 w64 case, as that might make the node inconsistent with
4593 its parent. So first, pull down to this level. */
4594 SVal* tree = &cl->svals[tno << 3];
4595 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004596 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004597 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4598 }
4599 }
4600 tl_assert(svNew != SVal_INVALID);
4601 cl->svals[cloff + 0] = svNew;
4602 cl->svals[cloff + 1] = SVal_INVALID;
4603 return;
4604 slowcase: /* misaligned */
4605 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004606 zsm_swrite08( a + 0, svNew );
4607 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004608}
4609
sewardj23f12002009-07-24 08:45:08 +00004610/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004611
4612static
sewardj23f12002009-07-24 08:45:08 +00004613void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004614 CacheLine* cl;
4615 UWord cloff, tno, toff;
4616 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004617 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004618 if (UNLIKELY(!aligned32(a))) goto slowcase;
4619 cl = get_cacheline(a);
4620 cloff = get_cacheline_offset(a);
4621 tno = get_treeno(a);
4622 toff = get_tree_offset(a); /* == 0 or 4 */
4623 descr = cl->descrs[tno];
4624 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4625 if (valid_value_is_above_me_32(descr, toff)) {
4626 /* We can't indiscriminately write on the w32 node as in the
4627 w64 case, as that might make the node inconsistent with
4628 its parent. So first, pull down to this level. */
4629 SVal* tree = &cl->svals[tno << 3];
4630 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004631 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004632 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4633 } else {
4634 /* Writing at this level. Need to fix up 'descr'. */
4635 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4636 /* At this point, the tree does not match cl->descr[tno] any
4637 more. The assignments below will fix it up. */
4638 }
4639 }
4640 tl_assert(svNew != SVal_INVALID);
4641 cl->svals[cloff + 0] = svNew;
4642 cl->svals[cloff + 1] = SVal_INVALID;
4643 cl->svals[cloff + 2] = SVal_INVALID;
4644 cl->svals[cloff + 3] = SVal_INVALID;
4645 return;
4646 slowcase: /* misaligned */
4647 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004648 zsm_swrite16( a + 0, svNew );
4649 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004650}
4651
sewardj23f12002009-07-24 08:45:08 +00004652/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004653
4654static
sewardj23f12002009-07-24 08:45:08 +00004655void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004656 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004657 UWord cloff, tno;
4658 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00004659 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004660 if (UNLIKELY(!aligned64(a))) goto slowcase;
4661 cl = get_cacheline(a);
4662 cloff = get_cacheline_offset(a);
4663 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004664 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004665 cl->descrs[tno] = TREE_DESCR_64;
4666 tl_assert(svNew != SVal_INVALID);
4667 cl->svals[cloff + 0] = svNew;
4668 cl->svals[cloff + 1] = SVal_INVALID;
4669 cl->svals[cloff + 2] = SVal_INVALID;
4670 cl->svals[cloff + 3] = SVal_INVALID;
4671 cl->svals[cloff + 4] = SVal_INVALID;
4672 cl->svals[cloff + 5] = SVal_INVALID;
4673 cl->svals[cloff + 6] = SVal_INVALID;
4674 cl->svals[cloff + 7] = SVal_INVALID;
4675 return;
4676 slowcase: /* misaligned */
4677 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004678 zsm_swrite32( a + 0, svNew );
4679 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004680}
4681
sewardj23f12002009-07-24 08:45:08 +00004682/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004683
4684static
sewardj23f12002009-07-24 08:45:08 +00004685SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004686 CacheLine* cl;
4687 UWord cloff, tno, toff;
4688 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004689 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004690 cl = get_cacheline(a);
4691 cloff = get_cacheline_offset(a);
4692 tno = get_treeno(a);
4693 toff = get_tree_offset(a); /* == 0 .. 7 */
4694 descr = cl->descrs[tno];
4695 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4696 SVal* tree = &cl->svals[tno << 3];
4697 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4698 }
4699 return cl->svals[cloff];
4700}
4701
sewardj23f12002009-07-24 08:45:08 +00004702static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00004703 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00004704 stats__cline_scopy08s++;
4705 sv = zsm_sread08( src );
4706 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00004707}
4708
4709
sewardj23f12002009-07-24 08:45:08 +00004710/* Block-copy states (needed for implementing realloc()). Note this
4711 doesn't change the filtering arrangements. The caller of
4712 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00004713
sewardj23f12002009-07-24 08:45:08 +00004714static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00004715{
4716 SizeT i;
4717 if (len == 0)
4718 return;
4719
4720 /* assert for non-overlappingness */
4721 tl_assert(src+len <= dst || dst+len <= src);
4722
4723 /* To be simple, just copy byte by byte. But so as not to wreck
4724 performance for later accesses to dst[0 .. len-1], normalise
4725 destination lines as we finish with them, and also normalise the
4726 line containing the first and last address. */
4727 for (i = 0; i < len; i++) {
4728 Bool normalise
4729 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4730 || i == 0 /* first in range */
4731 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00004732 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00004733 }
4734}
4735
4736
4737/* For setting address ranges to a given value. Has considerable
4738 sophistication so as to avoid generating large numbers of pointless
4739 cache loads/writebacks for large ranges. */
4740
4741/* Do small ranges in-cache, in the obvious way. */
4742static
sewardj23f12002009-07-24 08:45:08 +00004743void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004744{
4745 /* fast track a couple of common cases */
4746 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00004747 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004748 return;
4749 }
4750 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00004751 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004752 return;
4753 }
4754
4755 /* be completely general (but as efficient as possible) */
4756 if (len == 0) return;
4757
4758 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004759 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004760 a += 1;
4761 len -= 1;
4762 tl_assert(aligned16(a));
4763 }
4764 if (len == 0) return;
4765
4766 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004767 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004768 a += 2;
4769 len -= 2;
4770 tl_assert(aligned32(a));
4771 }
4772 if (len == 0) return;
4773
4774 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004775 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004776 a += 4;
4777 len -= 4;
4778 tl_assert(aligned64(a));
4779 }
4780 if (len == 0) return;
4781
4782 if (len >= 8) {
4783 tl_assert(aligned64(a));
4784 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00004785 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004786 a += 8;
4787 len -= 8;
4788 }
4789 tl_assert(aligned64(a));
4790 }
4791 if (len == 0) return;
4792
4793 if (len >= 4)
4794 tl_assert(aligned32(a));
4795 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004796 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004797 a += 4;
4798 len -= 4;
4799 }
4800 if (len == 0) return;
4801
4802 if (len >= 2)
4803 tl_assert(aligned16(a));
4804 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004805 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004806 a += 2;
4807 len -= 2;
4808 }
4809 if (len == 0) return;
4810
4811 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004812 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00004813 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004814 len -= 1;
4815 }
4816 tl_assert(len == 0);
4817}
4818
4819
sewardj23f12002009-07-24 08:45:08 +00004820/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00004821 for larger ranges, try to operate directly on the out-of-cache
4822 representation, rather than dragging lines into the cache,
4823 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00004824 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00004825
sewardj23f12002009-07-24 08:45:08 +00004826 Note that this doesn't change the filtering arrangements. The
4827 caller of zsm_sset_range needs to attend to that. */
4828
4829static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004830{
4831 tl_assert(svNew != SVal_INVALID);
4832 stats__cache_make_New_arange += (ULong)len;
4833
4834 if (0 && len > 500)
4835 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4836
4837 if (0) {
4838 static UWord n_New_in_cache = 0;
4839 static UWord n_New_not_in_cache = 0;
4840 /* tag is 'a' with the in-line offset masked out,
4841 eg a[31]..a[4] 0000 */
4842 Addr tag = a & ~(N_LINE_ARANGE - 1);
4843 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4844 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4845 n_New_in_cache++;
4846 } else {
4847 n_New_not_in_cache++;
4848 }
4849 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4850 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4851 n_New_in_cache, n_New_not_in_cache );
4852 }
4853
4854 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00004855 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004856 } else {
4857 Addr before_start = a;
4858 Addr aligned_start = cacheline_ROUNDUP(a);
4859 Addr after_start = cacheline_ROUNDDN(a + len);
4860 UWord before_len = aligned_start - before_start;
4861 UWord aligned_len = after_start - aligned_start;
4862 UWord after_len = a + len - after_start;
4863 tl_assert(before_start <= aligned_start);
4864 tl_assert(aligned_start <= after_start);
4865 tl_assert(before_len < N_LINE_ARANGE);
4866 tl_assert(after_len < N_LINE_ARANGE);
4867 tl_assert(get_cacheline_offset(aligned_start) == 0);
4868 if (get_cacheline_offset(a) == 0) {
4869 tl_assert(before_len == 0);
4870 tl_assert(a == aligned_start);
4871 }
4872 if (get_cacheline_offset(a+len) == 0) {
4873 tl_assert(after_len == 0);
4874 tl_assert(after_start == a+len);
4875 }
4876 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004877 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004878 }
4879 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004880 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004881 }
4882 stats__cache_make_New_inZrep += (ULong)aligned_len;
4883
4884 while (1) {
4885 Addr tag;
4886 UWord wix;
4887 if (aligned_start >= after_start)
4888 break;
4889 tl_assert(get_cacheline_offset(aligned_start) == 0);
4890 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4891 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4892 if (tag == cache_shmem.tags0[wix]) {
4893 UWord i;
4894 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00004895 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004896 } else {
4897 UWord i;
4898 Word zix;
4899 SecMap* sm;
4900 LineZ* lineZ;
4901 /* This line is not in the cache. Do not force it in; instead
4902 modify it in-place. */
4903 /* find the Z line to write in and rcdec it or the
4904 associated F line. */
4905 find_Z_for_writing( &sm, &zix, tag );
4906 tl_assert(sm);
4907 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4908 lineZ = &sm->linesZ[zix];
4909 lineZ->dict[0] = svNew;
4910 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4911 for (i = 0; i < N_LINE_ARANGE/4; i++)
4912 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4913 rcinc_LineZ(lineZ);
4914 }
4915 aligned_start += N_LINE_ARANGE;
4916 aligned_len -= N_LINE_ARANGE;
4917 }
4918 tl_assert(aligned_start == after_start);
4919 tl_assert(aligned_len == 0);
4920 }
4921}
4922
4923
4924/////////////////////////////////////////////////////////
4925// //
sewardj23f12002009-07-24 08:45:08 +00004926// Front-filtering accesses //
4927// //
4928/////////////////////////////////////////////////////////
4929
4930static UWord stats__f_ac = 0;
4931static UWord stats__f_sk = 0;
4932
4933#if 0
4934# define STATS__F_SHOW \
4935 do { \
4936 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
4937 VG_(printf)("filters: ac %lu sk %lu\n", \
4938 stats__f_ac, stats__f_sk); \
4939 } while (0)
4940#else
4941# define STATS__F_SHOW /* */
4942#endif
4943
4944void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
4945 stats__f_ac++;
4946 STATS__F_SHOW;
4947 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
4948 stats__f_sk++;
4949 return;
4950 }
4951 zsm_sapply08__msmcwrite(thr, a);
4952}
4953
4954void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
4955 stats__f_ac++;
4956 STATS__F_SHOW;
4957 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
4958 stats__f_sk++;
4959 return;
4960 }
4961 zsm_sapply16__msmcwrite(thr, a);
4962}
4963
4964void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
4965 stats__f_ac++;
4966 STATS__F_SHOW;
4967 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
4968 stats__f_sk++;
4969 return;
4970 }
4971 zsm_sapply32__msmcwrite(thr, a);
4972}
4973
4974void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
4975 stats__f_ac++;
4976 STATS__F_SHOW;
4977 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
4978 stats__f_sk++;
4979 return;
4980 }
4981 zsm_sapply64__msmcwrite(thr, a);
4982}
4983
4984void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
4985{
4986 /* fast track a couple of common cases */
4987 if (len == 4 && aligned32(a)) {
4988 zsm_sapply32_f__msmcwrite( thr, a );
4989 return;
4990 }
4991 if (len == 8 && aligned64(a)) {
4992 zsm_sapply64_f__msmcwrite( thr, a );
4993 return;
4994 }
4995
4996 /* be completely general (but as efficient as possible) */
4997 if (len == 0) return;
4998
4999 if (!aligned16(a) && len >= 1) {
5000 zsm_sapply08_f__msmcwrite( thr, a );
5001 a += 1;
5002 len -= 1;
5003 tl_assert(aligned16(a));
5004 }
5005 if (len == 0) return;
5006
5007 if (!aligned32(a) && len >= 2) {
5008 zsm_sapply16_f__msmcwrite( thr, a );
5009 a += 2;
5010 len -= 2;
5011 tl_assert(aligned32(a));
5012 }
5013 if (len == 0) return;
5014
5015 if (!aligned64(a) && len >= 4) {
5016 zsm_sapply32_f__msmcwrite( thr, a );
5017 a += 4;
5018 len -= 4;
5019 tl_assert(aligned64(a));
5020 }
5021 if (len == 0) return;
5022
5023 if (len >= 8) {
5024 tl_assert(aligned64(a));
5025 while (len >= 8) {
5026 zsm_sapply64_f__msmcwrite( thr, a );
5027 a += 8;
5028 len -= 8;
5029 }
5030 tl_assert(aligned64(a));
5031 }
5032 if (len == 0) return;
5033
5034 if (len >= 4)
5035 tl_assert(aligned32(a));
5036 if (len >= 4) {
5037 zsm_sapply32_f__msmcwrite( thr, a );
5038 a += 4;
5039 len -= 4;
5040 }
5041 if (len == 0) return;
5042
5043 if (len >= 2)
5044 tl_assert(aligned16(a));
5045 if (len >= 2) {
5046 zsm_sapply16_f__msmcwrite( thr, a );
5047 a += 2;
5048 len -= 2;
5049 }
5050 if (len == 0) return;
5051
5052 if (len >= 1) {
5053 zsm_sapply08_f__msmcwrite( thr, a );
5054 //a += 1;
5055 len -= 1;
5056 }
5057 tl_assert(len == 0);
5058}
5059
5060void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5061 stats__f_ac++;
5062 STATS__F_SHOW;
5063 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5064 stats__f_sk++;
5065 return;
5066 }
5067 zsm_sapply08__msmcread(thr, a);
5068}
5069
5070void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5071 stats__f_ac++;
5072 STATS__F_SHOW;
5073 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5074 stats__f_sk++;
5075 return;
5076 }
5077 zsm_sapply16__msmcread(thr, a);
5078}
5079
5080void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5081 stats__f_ac++;
5082 STATS__F_SHOW;
5083 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5084 stats__f_sk++;
5085 return;
5086 }
5087 zsm_sapply32__msmcread(thr, a);
5088}
5089
5090void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5091 stats__f_ac++;
5092 STATS__F_SHOW;
5093 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5094 stats__f_sk++;
5095 return;
5096 }
5097 zsm_sapply64__msmcread(thr, a);
5098}
5099
5100void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5101{
5102 /* fast track a couple of common cases */
5103 if (len == 4 && aligned32(a)) {
5104 zsm_sapply32_f__msmcread( thr, a );
5105 return;
5106 }
5107 if (len == 8 && aligned64(a)) {
5108 zsm_sapply64_f__msmcread( thr, a );
5109 return;
5110 }
5111
5112 /* be completely general (but as efficient as possible) */
5113 if (len == 0) return;
5114
5115 if (!aligned16(a) && len >= 1) {
5116 zsm_sapply08_f__msmcread( thr, a );
5117 a += 1;
5118 len -= 1;
5119 tl_assert(aligned16(a));
5120 }
5121 if (len == 0) return;
5122
5123 if (!aligned32(a) && len >= 2) {
5124 zsm_sapply16_f__msmcread( thr, a );
5125 a += 2;
5126 len -= 2;
5127 tl_assert(aligned32(a));
5128 }
5129 if (len == 0) return;
5130
5131 if (!aligned64(a) && len >= 4) {
5132 zsm_sapply32_f__msmcread( thr, a );
5133 a += 4;
5134 len -= 4;
5135 tl_assert(aligned64(a));
5136 }
5137 if (len == 0) return;
5138
5139 if (len >= 8) {
5140 tl_assert(aligned64(a));
5141 while (len >= 8) {
5142 zsm_sapply64_f__msmcread( thr, a );
5143 a += 8;
5144 len -= 8;
5145 }
5146 tl_assert(aligned64(a));
5147 }
5148 if (len == 0) return;
5149
5150 if (len >= 4)
5151 tl_assert(aligned32(a));
5152 if (len >= 4) {
5153 zsm_sapply32_f__msmcread( thr, a );
5154 a += 4;
5155 len -= 4;
5156 }
5157 if (len == 0) return;
5158
5159 if (len >= 2)
5160 tl_assert(aligned16(a));
5161 if (len >= 2) {
5162 zsm_sapply16_f__msmcread( thr, a );
5163 a += 2;
5164 len -= 2;
5165 }
5166 if (len == 0) return;
5167
5168 if (len >= 1) {
5169 zsm_sapply08_f__msmcread( thr, a );
5170 //a += 1;
5171 len -= 1;
5172 }
5173 tl_assert(len == 0);
5174}
5175
5176void libhb_Thr_resumes ( Thr* thr )
5177{
5178 if (0) VG_(printf)("resume %p\n", thr);
5179 Filter__clear(thr->filter, "libhb_Thr_resumes");
5180 /* A kludge, but .. if this thread doesn't have any marker stacks
5181 at all, get one right now. This is easier than figuring out
5182 exactly when at thread startup we can and can't take a stack
5183 snapshot. */
5184 tl_assert(thr->local_Krs_n_stacks);
5185 if (VG_(sizeXA)( thr->local_Krs_n_stacks ) == 0)
5186 note_local_Kr_n_stack_for(thr);
5187}
5188
5189
5190/////////////////////////////////////////////////////////
5191// //
sewardjf98e1c02008-10-25 16:22:41 +00005192// Synchronisation objects //
5193// //
5194/////////////////////////////////////////////////////////
5195
5196// (UInt) `echo "Synchronisation object" | md5sum`
5197#define SO_MAGIC 0x56b3c5b0U
5198
5199struct _SO {
5200 VtsID viR; /* r-clock of sender */
5201 VtsID viW; /* w-clock of sender */
5202 UInt magic;
5203};
5204
5205static SO* SO__Alloc ( void ) {
5206 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5207 so->viR = VtsID_INVALID;
5208 so->viW = VtsID_INVALID;
5209 so->magic = SO_MAGIC;
5210 return so;
5211}
5212static void SO__Dealloc ( SO* so ) {
5213 tl_assert(so);
5214 tl_assert(so->magic == SO_MAGIC);
5215 if (so->viR == VtsID_INVALID) {
5216 tl_assert(so->viW == VtsID_INVALID);
5217 } else {
5218 tl_assert(so->viW != VtsID_INVALID);
5219 VtsID__rcdec(so->viR);
5220 VtsID__rcdec(so->viW);
5221 }
5222 so->magic = 0;
5223 HG_(free)( so );
5224}
5225
5226
5227/////////////////////////////////////////////////////////
5228// //
5229// Top Level API //
5230// //
5231/////////////////////////////////////////////////////////
5232
5233static void show_thread_state ( HChar* str, Thr* t )
5234{
5235 if (1) return;
5236 if (t->viR == t->viW) {
5237 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5238 VtsID__pp( t->viR );
5239 VG_(printf)("%s","\n");
5240 } else {
5241 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5242 VtsID__pp( t->viR );
5243 VG_(printf)(" viW %u==", t->viW);
5244 VtsID__pp( t->viW );
5245 VG_(printf)("%s","\n");
5246 }
5247}
5248
5249
5250Thr* libhb_init (
5251 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005252 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005253 )
5254{
5255 Thr* thr;
5256 VtsID vi;
5257 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005258 tl_assert(get_EC);
5259 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005260 main_get_EC = get_EC;
5261
5262 // No need to initialise hg_wordfm.
5263 // No need to initialise hg_wordset.
5264
5265 vts_set_init();
5266 vts_tab_init();
5267 event_map_init();
5268 VtsID__invalidate_caches();
5269
5270 // initialise shadow memory
5271 zsm_init( SVal__rcinc, SVal__rcdec );
5272
5273 thr = Thr__new();
5274 vi = VtsID__mk_Singleton( thr, 1 );
5275 thr->viR = vi;
5276 thr->viW = vi;
5277 VtsID__rcinc(thr->viR);
5278 VtsID__rcinc(thr->viW);
5279
5280 show_thread_state(" root", thr);
5281 return thr;
5282}
5283
sewardj23f12002009-07-24 08:45:08 +00005284
sewardjf98e1c02008-10-25 16:22:41 +00005285Thr* libhb_create ( Thr* parent )
5286{
5287 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5288 the child's index. Since the child's index is guaranteed
5289 unique, it has never been seen before, so the implicit value
5290 before the tick is zero and after that is one. */
5291 Thr* child = Thr__new();
5292
5293 child->viR = VtsID__tick( parent->viR, child );
5294 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005295 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005296 VtsID__rcinc(child->viR);
5297 VtsID__rcinc(child->viW);
sewardj23f12002009-07-24 08:45:08 +00005298 /* We need to do note_local_Kr_n_stack_for( child ), but it's too
5299 early for that - it may not have a valid TId yet. So, let
5300 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005301
5302 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5303 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5304
5305 /* and the parent has to move along too */
5306 VtsID__rcdec(parent->viR);
5307 VtsID__rcdec(parent->viW);
5308 parent->viR = VtsID__tick( parent->viR, parent );
5309 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005310 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005311 VtsID__rcinc(parent->viR);
5312 VtsID__rcinc(parent->viW);
sewardj23f12002009-07-24 08:45:08 +00005313 note_local_Kr_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005314
5315 show_thread_state(" child", child);
5316 show_thread_state("parent", parent);
5317
5318 return child;
5319}
5320
5321/* Shut down the library, and print stats (in fact that's _all_
5322 this is for. */
5323void libhb_shutdown ( Bool show_stats )
5324{
5325 if (show_stats) {
5326 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5327 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5328 stats__secmaps_allocd,
5329 stats__secmap_ga_space_covered);
5330 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5331 stats__secmap_linesZ_allocd,
5332 stats__secmap_linesZ_bytes);
5333 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5334 stats__secmap_linesF_allocd,
5335 stats__secmap_linesF_bytes);
5336 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5337 stats__secmap_iterator_steppings);
5338 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5339 stats__secmaps_search, stats__secmaps_search_slow);
5340
5341 VG_(printf)("%s","\n");
5342 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5343 stats__cache_totrefs, stats__cache_totmisses );
5344 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5345 stats__cache_Z_fetches, stats__cache_F_fetches );
5346 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5347 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5348 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5349 stats__cache_invals, stats__cache_flushes );
5350 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5351 stats__cache_make_New_arange,
5352 stats__cache_make_New_inZrep);
5353
5354 VG_(printf)("%s","\n");
5355 VG_(printf)(" cline: %'10lu normalises\n",
5356 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00005357 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5358 stats__cline_cread64s,
5359 stats__cline_cread32s,
5360 stats__cline_cread16s,
5361 stats__cline_cread08s );
5362 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5363 stats__cline_cwrite64s,
5364 stats__cline_cwrite32s,
5365 stats__cline_cwrite16s,
5366 stats__cline_cwrite08s );
5367 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5368 stats__cline_swrite64s,
5369 stats__cline_swrite32s,
5370 stats__cline_swrite16s,
5371 stats__cline_swrite08s );
5372 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5373 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00005374 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5375 stats__cline_64to32splits,
5376 stats__cline_32to16splits,
5377 stats__cline_16to8splits );
5378 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5379 stats__cline_64to32pulldown,
5380 stats__cline_32to16pulldown,
5381 stats__cline_16to8pulldown );
5382 if (0)
5383 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5384 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5385
5386 VG_(printf)("%s","\n");
5387
sewardj23f12002009-07-24 08:45:08 +00005388 VG_(printf)(" libhb: %'13llu msmcread (%'llu changed)\n",
5389 stats__msmcread, stats__msmcread_change);
5390 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu changed)\n",
5391 stats__msmcwrite, stats__msmcwrite_change);
5392 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5393 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00005394 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5395 stats__join2_queries, stats__join2_misses);
5396
5397 VG_(printf)("%s","\n");
5398 VG_(printf)(
5399 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5400 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5401 );
5402 VG_(printf)( " libhb: %lu entries in vts_set\n",
5403 VG_(sizeFM)( vts_set ) );
5404
5405 VG_(printf)("%s","\n");
5406 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5407 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5408 stats__ctxt_rcdec2,
5409 stats__ctxt_rcdec3 );
5410 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5411 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5412 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5413 (UWord)N_RCEC_TAB,
5414 stats__ctxt_tab_curr );
5415 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5416 stats__ctxt_tab_qs,
5417 stats__ctxt_tab_cmps );
5418#if 0
5419 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5420 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5421 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5422 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5423 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5424 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5425 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5426 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5427 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5428 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5429 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5430 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5431 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5432 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5433
5434 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5435 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5436 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5437 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5438#endif
5439
5440 VG_(printf)("%s","<<< END libhb stats >>>\n");
5441 VG_(printf)("%s","\n");
5442
5443 }
5444}
5445
5446void libhb_async_exit ( Thr* thr )
5447{
sewardj23f12002009-07-24 08:45:08 +00005448 tl_assert(thr);
5449 thr->still_alive = False;
5450 /* XXX free up Filter and local_Krs_n_stacks */
sewardjf98e1c02008-10-25 16:22:41 +00005451}
5452
5453/* Both Segs and SOs point to VTSs. However, there is no sharing, so
5454 a Seg that points at a VTS is its one-and-only owner, and ditto for
5455 a SO that points at a VTS. */
5456
5457SO* libhb_so_alloc ( void )
5458{
5459 return SO__Alloc();
5460}
5461
5462void libhb_so_dealloc ( SO* so )
5463{
5464 tl_assert(so);
5465 tl_assert(so->magic == SO_MAGIC);
5466 SO__Dealloc(so);
5467}
5468
5469/* See comments in libhb.h for details on the meaning of
5470 strong vs weak sends and strong vs weak receives. */
5471void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5472{
5473 /* Copy the VTSs from 'thr' into the sync object, and then move
5474 the thread along one step. */
5475
5476 tl_assert(so);
5477 tl_assert(so->magic == SO_MAGIC);
5478
5479 /* stay sane .. a thread's read-clock must always lead or be the
5480 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00005481 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5482 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00005483 }
5484
5485 /* since we're overwriting the VtsIDs in the SO, we need to drop
5486 any references made by the previous contents thereof */
5487 if (so->viR == VtsID_INVALID) {
5488 tl_assert(so->viW == VtsID_INVALID);
5489 so->viR = thr->viR;
5490 so->viW = thr->viW;
5491 VtsID__rcinc(so->viR);
5492 VtsID__rcinc(so->viW);
5493 } else {
5494 /* In a strong send, we dump any previous VC in the SO and
5495 install the sending thread's VC instead. For a weak send we
5496 must join2 with what's already there. */
5497 tl_assert(so->viW != VtsID_INVALID);
5498 VtsID__rcdec(so->viR);
5499 VtsID__rcdec(so->viW);
5500 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5501 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5502 VtsID__rcinc(so->viR);
5503 VtsID__rcinc(so->viW);
5504 }
5505
5506 /* move both parent clocks along */
5507 VtsID__rcdec(thr->viR);
5508 VtsID__rcdec(thr->viW);
5509 thr->viR = VtsID__tick( thr->viR, thr );
5510 thr->viW = VtsID__tick( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00005511 Filter__clear(thr->filter, "libhb_so_send");
5512 if (thr->still_alive)
5513 note_local_Kr_n_stack_for(thr);
sewardjf98e1c02008-10-25 16:22:41 +00005514 VtsID__rcinc(thr->viR);
5515 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005516
sewardjf98e1c02008-10-25 16:22:41 +00005517 if (strong_send)
5518 show_thread_state("s-send", thr);
5519 else
5520 show_thread_state("w-send", thr);
5521}
5522
5523void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5524{
5525 tl_assert(so);
5526 tl_assert(so->magic == SO_MAGIC);
5527
5528 if (so->viR != VtsID_INVALID) {
5529 tl_assert(so->viW != VtsID_INVALID);
5530
5531 /* Weak receive (basically, an R-acquisition of a R-W lock).
5532 This advances the read-clock of the receiver, but not the
5533 write-clock. */
5534 VtsID__rcdec(thr->viR);
5535 thr->viR = VtsID__join2( thr->viR, so->viR );
5536 VtsID__rcinc(thr->viR);
5537
sewardj23f12002009-07-24 08:45:08 +00005538// QQQ
5539VtsID__rcdec(thr->viR);
5540thr->viR = VtsID__tick( thr->viR, thr );
5541VtsID__rcinc(thr->viR);
5542
sewardjf98e1c02008-10-25 16:22:41 +00005543 /* For a strong receive, we also advance the receiver's write
5544 clock, which means the receive as a whole is essentially
5545 equivalent to a W-acquisition of a R-W lock. */
5546 if (strong_recv) {
5547 VtsID__rcdec(thr->viW);
5548 thr->viW = VtsID__join2( thr->viW, so->viW );
5549 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005550
5551
5552// QQQ
5553VtsID__rcdec(thr->viW);
5554thr->viW = VtsID__tick( thr->viW, thr );
5555VtsID__rcinc(thr->viW);
5556
5557
sewardjf98e1c02008-10-25 16:22:41 +00005558 }
5559
sewardj23f12002009-07-24 08:45:08 +00005560 Filter__clear(thr->filter, "libhb_so_recv");
5561 note_local_Kr_n_stack_for(thr);
5562
sewardjf98e1c02008-10-25 16:22:41 +00005563 if (strong_recv)
5564 show_thread_state("s-recv", thr);
5565 else
5566 show_thread_state("w-recv", thr);
5567
5568 } else {
5569 tl_assert(so->viW == VtsID_INVALID);
5570 /* Deal with degenerate case: 'so' has no vts, so there has been
5571 no message posted to it. Just ignore this case. */
5572 show_thread_state("d-recv", thr);
5573 }
5574}
5575
5576Bool libhb_so_everSent ( SO* so )
5577{
5578 if (so->viR == VtsID_INVALID) {
5579 tl_assert(so->viW == VtsID_INVALID);
5580 return False;
5581 } else {
5582 tl_assert(so->viW != VtsID_INVALID);
5583 return True;
5584 }
5585}
5586
5587#define XXX1 0 // 0x67a106c
5588#define XXX2 0
5589
sewardj23f12002009-07-24 08:45:08 +00005590static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00005591 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5592 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5593 return False;
5594}
5595static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00005596 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00005597 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5598 show_thread_state("", thr);
5599 VG_(printf)("%s","\n");
5600}
5601
sewardj23f12002009-07-24 08:45:08 +00005602void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005603{
5604 SVal sv = SVal__mkC(thr->viW, thr->viW);
5605 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00005606 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5607 zsm_sset_range( a, szB, sv );
5608 Filter__clear_range( thr->filter, a, szB );
5609 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00005610}
5611
sewardj23f12002009-07-24 08:45:08 +00005612void libhb_srange_noaccess ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005613{
sewardj23f12002009-07-24 08:45:08 +00005614 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00005615}
5616
5617void* libhb_get_Thr_opaque ( Thr* thr ) {
5618 tl_assert(thr);
5619 return thr->opaque;
5620}
5621
5622void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
5623 tl_assert(thr);
5624 thr->opaque = v;
5625}
5626
sewardj23f12002009-07-24 08:45:08 +00005627void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005628{
sewardj23f12002009-07-24 08:45:08 +00005629 zsm_scopy_range(src, dst, len);
5630 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00005631}
5632
5633void libhb_maybe_GC ( void )
5634{
5635 event_map_maybe_GC();
5636 /* If there are still freelist entries available, no need for a
5637 GC. */
5638 if (vts_tab_freelist != VtsID_INVALID)
5639 return;
5640 /* So all the table entries are full, and we're having to expand
5641 the table. But did we hit the threshhold point yet? */
5642 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5643 return;
5644 vts_tab__do_GC( False/*don't show stats*/ );
5645}
5646
5647
5648/////////////////////////////////////////////////////////////////
5649/////////////////////////////////////////////////////////////////
5650// //
5651// SECTION END main library //
5652// //
5653/////////////////////////////////////////////////////////////////
5654/////////////////////////////////////////////////////////////////
5655
5656/*--------------------------------------------------------------------*/
5657/*--- end libhb_main.c ---*/
5658/*--------------------------------------------------------------------*/