blob: e5a058241b9d4540bedb43324e85acf088f3da92 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
njn9f207462009-03-10 22:02:09 +000012 Copyright (C) 2008-2009 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000046#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
sewardj23f12002009-07-24 08:45:08 +0000150static void zsm_sset_range ( Addr, SizeT, SVal );
151static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000152static void zsm_flush_cache ( void );
153
154#endif /* ! __HB_ZSM_H */
155
156
sewardjf98e1c02008-10-25 16:22:41 +0000157/* Round a up to the next multiple of N. N must be a power of 2 */
158#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159/* Round a down to the next multiple of N. N must be a power of 2 */
160#define ROUNDDN(a, N) ((a) & ~(N-1))
161
162
163
164/* ------ User-supplied RC functions ------ */
165static void(*rcinc)(SVal) = NULL;
166static void(*rcdec)(SVal) = NULL;
167
168
169/* ------ CacheLine ------ */
170
171#define N_LINE_BITS 6 /* must be >= 3 */
172#define N_LINE_ARANGE (1 << N_LINE_BITS)
173#define N_LINE_TREES (N_LINE_ARANGE >> 3)
174
175typedef
176 struct {
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
179 }
180 CacheLine;
181
182#define TREE_DESCR_16_0 (1<<0)
183#define TREE_DESCR_32_0 (1<<1)
184#define TREE_DESCR_16_1 (1<<2)
185#define TREE_DESCR_64 (1<<3)
186#define TREE_DESCR_16_2 (1<<4)
187#define TREE_DESCR_32_1 (1<<5)
188#define TREE_DESCR_16_3 (1<<6)
189#define TREE_DESCR_8_0 (1<<7)
190#define TREE_DESCR_8_1 (1<<8)
191#define TREE_DESCR_8_2 (1<<9)
192#define TREE_DESCR_8_3 (1<<10)
193#define TREE_DESCR_8_4 (1<<11)
194#define TREE_DESCR_8_5 (1<<12)
195#define TREE_DESCR_8_6 (1<<13)
196#define TREE_DESCR_8_7 (1<<14)
197#define TREE_DESCR_DTY (1<<15)
198
199typedef
200 struct {
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
203 dict indexes */
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
206 }
207 LineZ; /* compressed rep for a cache line */
208
209typedef
210 struct {
211 Bool inUse;
212 SVal w64s[N_LINE_ARANGE];
213 }
214 LineF; /* full rep for a cache line */
215
216/* Shadow memory.
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
224
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
227*/
228#define N_SECMAP_BITS 13
229#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
230
231// # CacheLines held by a SecMap
232#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
233
234/* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
238
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
243
244 RC obligations: the RCs presented to the user include exactly
245 the values in:
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
248
249 Hence the following actions at the following transitions are required:
250
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
255*/
256typedef
257 struct {
258 UInt magic;
259 LineZ linesZ[N_SECMAP_ZLINES];
260 LineF* linesF;
261 UInt linesF_size;
262 }
263 SecMap;
264
265#define SecMap_MAGIC 0x571e58cbU
266
267static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
269}
270
271/* ------ Cache ------ */
272
273#define N_WAY_BITS 16
274#define N_WAY_NENT (1 << N_WAY_BITS)
275
276/* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
282 with a bogus tag. */
283typedef
284 struct {
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
287 }
288 Cache;
289
290static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
292 a CacheLine. */
293 return 0 == (tag & (N_LINE_ARANGE - 1));
294}
295
296
297/* --------- Primary data structures --------- */
298
299/* Shadow memory primary map */
300static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301static Cache cache_shmem;
302
303
304static UWord stats__secmaps_search = 0; // # SM finds
305static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315static UWord stats__cache_F_fetches = 0; // # F lines fetched
316static UWord stats__cache_F_wbacks = 0; // # F lines written back
317static UWord stats__cache_invals = 0; // # cache invals
318static UWord stats__cache_flushes = 0; // # cache flushes
319static UWord stats__cache_totrefs = 0; // # total accesses
320static UWord stats__cache_totmisses = 0; // # misses
321static ULong stats__cache_make_New_arange = 0; // total arange made New
322static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000324static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000338static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
344
345static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
346 return a & ~(N_SECMAP_ARANGE - 1);
347}
348static inline UWord shmem__get_SecMap_offset ( Addr a ) {
349 return a & (N_SECMAP_ARANGE - 1);
350}
351
352
353/*----------------------------------------------------------------*/
354/*--- map_shmem :: WordFM Addr SecMap ---*/
355/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
356/*----------------------------------------------------------------*/
357
358/*--------------- SecMap allocation --------------- */
359
360static HChar* shmem__bigchunk_next = NULL;
361static HChar* shmem__bigchunk_end1 = NULL;
362
363static void* shmem__bigchunk_alloc ( SizeT n )
364{
365 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
366 tl_assert(n > 0);
367 n = VG_ROUNDUP(n, 16);
368 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
369 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
370 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
371 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
372 if (0)
373 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
374 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
375 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
376 if (shmem__bigchunk_next == NULL)
377 VG_(out_of_memory_NORETURN)(
378 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
379 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
380 }
381 tl_assert(shmem__bigchunk_next);
382 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
383 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
384 shmem__bigchunk_next += n;
385 return shmem__bigchunk_next - n;
386}
387
388static SecMap* shmem__alloc_SecMap ( void )
389{
390 Word i, j;
391 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
392 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
393 tl_assert(sm);
394 sm->magic = SecMap_MAGIC;
395 for (i = 0; i < N_SECMAP_ZLINES; i++) {
396 sm->linesZ[i].dict[0] = SVal_NOACCESS;
397 sm->linesZ[i].dict[1] = SVal_INVALID;
398 sm->linesZ[i].dict[2] = SVal_INVALID;
399 sm->linesZ[i].dict[3] = SVal_INVALID;
400 for (j = 0; j < N_LINE_ARANGE/4; j++)
401 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
402 }
403 sm->linesF = NULL;
404 sm->linesF_size = 0;
405 stats__secmaps_allocd++;
406 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
407 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
408 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
409 return sm;
410}
411
412typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
413static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
414
415static SecMap* shmem__find_SecMap ( Addr ga )
416{
417 SecMap* sm = NULL;
418 Addr gaKey = shmem__round_to_SecMap_base(ga);
419 // Cache
420 stats__secmaps_search++;
421 if (LIKELY(gaKey == smCache[0].gaKey))
422 return smCache[0].sm;
423 if (LIKELY(gaKey == smCache[1].gaKey)) {
424 SMCacheEnt tmp = smCache[0];
425 smCache[0] = smCache[1];
426 smCache[1] = tmp;
427 return smCache[0].sm;
428 }
429 if (gaKey == smCache[2].gaKey) {
430 SMCacheEnt tmp = smCache[1];
431 smCache[1] = smCache[2];
432 smCache[2] = tmp;
433 return smCache[1].sm;
434 }
435 // end Cache
436 stats__secmaps_search_slow++;
437 if (VG_(lookupFM)( map_shmem,
438 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
439 tl_assert(sm != NULL);
440 smCache[2] = smCache[1];
441 smCache[1] = smCache[0];
442 smCache[0].gaKey = gaKey;
443 smCache[0].sm = sm;
444 } else {
445 tl_assert(sm == NULL);
446 }
447 return sm;
448}
449
450static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
451{
452 SecMap* sm = shmem__find_SecMap ( ga );
453 if (LIKELY(sm)) {
454 return sm;
455 } else {
456 /* create a new one */
457 Addr gaKey = shmem__round_to_SecMap_base(ga);
458 sm = shmem__alloc_SecMap();
459 tl_assert(sm);
460 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
461 return sm;
462 }
463}
464
465
466/* ------------ LineF and LineZ related ------------ */
467
468static void rcinc_LineF ( LineF* lineF ) {
469 UWord i;
470 tl_assert(lineF->inUse);
471 for (i = 0; i < N_LINE_ARANGE; i++)
472 rcinc(lineF->w64s[i]);
473}
474
475static void rcdec_LineF ( LineF* lineF ) {
476 UWord i;
477 tl_assert(lineF->inUse);
478 for (i = 0; i < N_LINE_ARANGE; i++)
479 rcdec(lineF->w64s[i]);
480}
481
482static void rcinc_LineZ ( LineZ* lineZ ) {
483 tl_assert(lineZ->dict[0] != SVal_INVALID);
484 rcinc(lineZ->dict[0]);
485 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
486 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
487 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
488}
489
490static void rcdec_LineZ ( LineZ* lineZ ) {
491 tl_assert(lineZ->dict[0] != SVal_INVALID);
492 rcdec(lineZ->dict[0]);
493 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
494 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
495 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
496}
497
498inline
499static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
500 Word bix, shft, mask, prep;
501 tl_assert(ix >= 0);
502 bix = ix >> 2;
503 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
504 mask = 3 << shft;
505 prep = b2 << shft;
506 arr[bix] = (arr[bix] & ~mask) | prep;
507}
508
509inline
510static UWord read_twobit_array ( UChar* arr, UWord ix ) {
511 Word bix, shft;
512 tl_assert(ix >= 0);
513 bix = ix >> 2;
514 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
515 return (arr[bix] >> shft) & 3;
516}
517
518/* Given address 'tag', find either the Z or F line containing relevant
519 data, so it can be read into the cache.
520*/
521static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
522 /*OUT*/LineF** fp, Addr tag ) {
523 LineZ* lineZ;
524 LineF* lineF;
525 UWord zix;
526 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
527 UWord smoff = shmem__get_SecMap_offset(tag);
528 /* since smoff is derived from a valid tag, it should be
529 cacheline-aligned. */
530 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
531 zix = smoff >> N_LINE_BITS;
532 tl_assert(zix < N_SECMAP_ZLINES);
533 lineZ = &sm->linesZ[zix];
534 lineF = NULL;
535 if (lineZ->dict[0] == SVal_INVALID) {
536 UInt fix = (UInt)lineZ->dict[1];
537 tl_assert(sm->linesF);
538 tl_assert(sm->linesF_size > 0);
539 tl_assert(fix >= 0 && fix < sm->linesF_size);
540 lineF = &sm->linesF[fix];
541 tl_assert(lineF->inUse);
542 lineZ = NULL;
543 }
544 *zp = lineZ;
545 *fp = lineF;
546}
547
548/* Given address 'tag', return the relevant SecMap and the index of
549 the LineZ within it, in the expectation that the line is to be
550 overwritten. Regardless of whether 'tag' is currently associated
551 with a Z or F representation, to rcdec on the current
552 representation, in recognition of the fact that the contents are
553 just about to be overwritten. */
554static __attribute__((noinline))
555void find_Z_for_writing ( /*OUT*/SecMap** smp,
556 /*OUT*/Word* zixp,
557 Addr tag ) {
558 LineZ* lineZ;
559 LineF* lineF;
560 UWord zix;
561 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
562 UWord smoff = shmem__get_SecMap_offset(tag);
563 /* since smoff is derived from a valid tag, it should be
564 cacheline-aligned. */
565 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
566 zix = smoff >> N_LINE_BITS;
567 tl_assert(zix < N_SECMAP_ZLINES);
568 lineZ = &sm->linesZ[zix];
569 lineF = NULL;
570 /* re RCs, we are freeing up this LineZ/LineF so that new data can
571 be parked in it. Hence have to rcdec it accordingly. */
572 /* If lineZ has an associated lineF, free it up. */
573 if (lineZ->dict[0] == SVal_INVALID) {
574 UInt fix = (UInt)lineZ->dict[1];
575 tl_assert(sm->linesF);
576 tl_assert(sm->linesF_size > 0);
577 tl_assert(fix >= 0 && fix < sm->linesF_size);
578 lineF = &sm->linesF[fix];
579 tl_assert(lineF->inUse);
580 rcdec_LineF(lineF);
581 lineF->inUse = False;
582 } else {
583 rcdec_LineZ(lineZ);
584 }
585 *smp = sm;
586 *zixp = zix;
587}
588
589static __attribute__((noinline))
590void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
591 UInt i, new_size;
592 LineF* nyu;
593
594 if (sm->linesF) {
595 tl_assert(sm->linesF_size > 0);
596 } else {
597 tl_assert(sm->linesF_size == 0);
598 }
599
600 if (sm->linesF) {
601 for (i = 0; i < sm->linesF_size; i++) {
602 if (!sm->linesF[i].inUse) {
603 *fixp = (Word)i;
604 return;
605 }
606 }
607 }
608
609 /* No free F line found. Expand existing array and try again. */
610 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
611 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
612 new_size * sizeof(LineF) );
613 tl_assert(nyu);
614
615 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
616 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
617 * sizeof(LineF);
618
619 if (0)
620 VG_(printf)("SM %p: expand F array from %d to %d\n",
621 sm, (Int)sm->linesF_size, new_size);
622
623 for (i = 0; i < new_size; i++)
624 nyu[i].inUse = False;
625
626 if (sm->linesF) {
627 for (i = 0; i < sm->linesF_size; i++) {
628 tl_assert(sm->linesF[i].inUse);
629 nyu[i] = sm->linesF[i];
630 }
631 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
632 HG_(free)(sm->linesF);
633 }
634
635 sm->linesF = nyu;
636 sm->linesF_size = new_size;
637
638 for (i = 0; i < sm->linesF_size; i++) {
639 if (!sm->linesF[i].inUse) {
640 *fixp = (Word)i;
641 return;
642 }
643 }
644
645 /*NOTREACHED*/
646 tl_assert(0);
647}
648
649
650/* ------------ CacheLine and implicit-tree related ------------ */
651
652__attribute__((unused))
653static void pp_CacheLine ( CacheLine* cl ) {
654 Word i;
655 if (!cl) {
656 VG_(printf)("%s","pp_CacheLine(NULL)\n");
657 return;
658 }
659 for (i = 0; i < N_LINE_TREES; i++)
660 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
661 for (i = 0; i < N_LINE_ARANGE; i++)
662 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
663}
664
665static UChar descr_to_validbits ( UShort descr )
666{
667 /* a.k.a Party Time for gcc's constant folder */
668# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
669 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
670 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
671 ( (b8_5) << 12) | ( (b8_4) << 11) | \
672 ( (b8_3) << 10) | ( (b8_2) << 9) | \
673 ( (b8_1) << 8) | ( (b8_0) << 7) | \
674 ( (b16_3) << 6) | ( (b32_1) << 5) | \
675 ( (b16_2) << 4) | ( (b64) << 3) | \
676 ( (b16_1) << 2) | ( (b32_0) << 1) | \
677 ( (b16_0) << 0) ) )
678
679# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
680 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
681 ( (bit5) << 5) | ( (bit4) << 4) | \
682 ( (bit3) << 3) | ( (bit2) << 2) | \
683 ( (bit1) << 1) | ( (bit0) << 0) ) )
684
685 /* these should all get folded out at compile time */
686 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
687 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
688 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
689 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
690 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
691 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
692 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
693 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
694 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
695
696 switch (descr) {
697 /*
698 +--------------------------------- TREE_DESCR_8_7
699 | +------------------- TREE_DESCR_8_0
700 | | +---------------- TREE_DESCR_16_3
701 | | | +-------------- TREE_DESCR_32_1
702 | | | | +------------ TREE_DESCR_16_2
703 | | | | | +--------- TREE_DESCR_64
704 | | | | | | +------ TREE_DESCR_16_1
705 | | | | | | | +---- TREE_DESCR_32_0
706 | | | | | | | | +-- TREE_DESCR_16_0
707 | | | | | | | | |
708 | | | | | | | | | GRANULARITY, 7 -> 0 */
709 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
710 return BYTE(1,1,1,1,1,1,1,1);
711 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
712 return BYTE(1,1,0,1,1,1,1,1);
713 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
714 return BYTE(0,1,1,1,1,1,1,1);
715 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
716 return BYTE(0,1,0,1,1,1,1,1);
717
718 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
719 return BYTE(1,1,1,1,1,1,0,1);
720 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
721 return BYTE(1,1,0,1,1,1,0,1);
722 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
723 return BYTE(0,1,1,1,1,1,0,1);
724 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
725 return BYTE(0,1,0,1,1,1,0,1);
726
727 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
728 return BYTE(1,1,1,1,0,1,1,1);
729 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
730 return BYTE(1,1,0,1,0,1,1,1);
731 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
732 return BYTE(0,1,1,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
734 return BYTE(0,1,0,1,0,1,1,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
737 return BYTE(1,1,1,1,0,1,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
739 return BYTE(1,1,0,1,0,1,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
741 return BYTE(0,1,1,1,0,1,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
743 return BYTE(0,1,0,1,0,1,0,1);
744
745 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
746 return BYTE(0,0,0,1,1,1,1,1);
747 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
748 return BYTE(0,0,0,1,1,1,0,1);
749 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
750 return BYTE(0,0,0,1,0,1,1,1);
751 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
752 return BYTE(0,0,0,1,0,1,0,1);
753
754 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
755 return BYTE(1,1,1,1,0,0,0,1);
756 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
757 return BYTE(1,1,0,1,0,0,0,1);
758 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
759 return BYTE(0,1,1,1,0,0,0,1);
760 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
761 return BYTE(0,1,0,1,0,0,0,1);
762
763 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
764 return BYTE(0,0,0,1,0,0,0,1);
765
766 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
767 return BYTE(0,0,0,0,0,0,0,1);
768
769 default: return BYTE(0,0,0,0,0,0,0,0);
770 /* INVALID - any valid descr produces at least one
771 valid bit in tree[0..7]*/
772 }
773 /* NOTREACHED*/
774 tl_assert(0);
775
776# undef DESCR
777# undef BYTE
778}
779
780__attribute__((unused))
781static Bool is_sane_Descr ( UShort descr ) {
782 return descr_to_validbits(descr) != 0;
783}
784
785static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
786 VG_(sprintf)(dst,
787 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
788 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
789 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
790 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
791 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
792 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
793 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
794 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
795 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
796 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
797 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
798 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
803 );
804}
805static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
806 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
807 (Int)((byte & 128) ? 1 : 0),
808 (Int)((byte & 64) ? 1 : 0),
809 (Int)((byte & 32) ? 1 : 0),
810 (Int)((byte & 16) ? 1 : 0),
811 (Int)((byte & 8) ? 1 : 0),
812 (Int)((byte & 4) ? 1 : 0),
813 (Int)((byte & 2) ? 1 : 0),
814 (Int)((byte & 1) ? 1 : 0)
815 );
816}
817
818static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
819 Word i;
820 UChar validbits = descr_to_validbits(descr);
821 HChar buf[128], buf2[128];
822 if (validbits == 0)
823 goto bad;
824 for (i = 0; i < 8; i++) {
825 if (validbits & (1<<i)) {
826 if (tree[i] == SVal_INVALID)
827 goto bad;
828 } else {
829 if (tree[i] != SVal_INVALID)
830 goto bad;
831 }
832 }
833 return True;
834 bad:
835 sprintf_Descr( buf, descr );
836 sprintf_Byte( buf2, validbits );
837 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
838 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
839 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
840 for (i = 0; i < 8; i++)
841 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
842 VG_(printf)("%s","}\n");
843 return 0;
844}
845
846static Bool is_sane_CacheLine ( CacheLine* cl )
847{
848 Word tno, cloff;
849
850 if (!cl) goto bad;
851
852 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
853 UShort descr = cl->descrs[tno];
854 SVal* tree = &cl->svals[cloff];
855 if (!is_sane_Descr_and_Tree(descr, tree))
856 goto bad;
857 }
858 tl_assert(cloff == N_LINE_ARANGE);
859 return True;
860 bad:
861 pp_CacheLine(cl);
862 return False;
863}
864
865static UShort normalise_tree ( /*MOD*/SVal* tree )
866{
867 UShort descr;
868 /* pre: incoming tree[0..7] does not have any invalid shvals, in
869 particular no zeroes. */
870 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
871 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
872 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
873 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
874 tl_assert(0);
875
876 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
877 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
878 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
879 /* build 16-bit layer */
880 if (tree[1] == tree[0]) {
881 tree[1] = SVal_INVALID;
882 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
883 descr |= TREE_DESCR_16_0;
884 }
885 if (tree[3] == tree[2]) {
886 tree[3] = SVal_INVALID;
887 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
888 descr |= TREE_DESCR_16_1;
889 }
890 if (tree[5] == tree[4]) {
891 tree[5] = SVal_INVALID;
892 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
893 descr |= TREE_DESCR_16_2;
894 }
895 if (tree[7] == tree[6]) {
896 tree[7] = SVal_INVALID;
897 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
898 descr |= TREE_DESCR_16_3;
899 }
900 /* build 32-bit layer */
901 if (tree[2] == tree[0]
902 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
903 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
904 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
905 descr |= TREE_DESCR_32_0;
906 }
907 if (tree[6] == tree[4]
908 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
909 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
910 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
911 descr |= TREE_DESCR_32_1;
912 }
913 /* build 64-bit layer */
914 if (tree[4] == tree[0]
915 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
916 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
917 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
918 descr |= TREE_DESCR_64;
919 }
920 return descr;
921}
922
923/* This takes a cacheline where all the data is at the leaves
924 (w8[..]) and builds a correctly normalised tree. */
925static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
926{
927 Word tno, cloff;
928 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
929 SVal* tree = &cl->svals[cloff];
930 cl->descrs[tno] = normalise_tree( tree );
931 }
932 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000933 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000934 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
935 stats__cline_normalises++;
936}
937
938
939typedef struct { UChar count; SVal sval; } CountedSVal;
940
941static
942void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
943 /*OUT*/Word* dstUsedP,
944 Word nDst, CacheLine* src )
945{
946 Word tno, cloff, dstUsed;
947
948 tl_assert(nDst == N_LINE_ARANGE);
949 dstUsed = 0;
950
951 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
952 UShort descr = src->descrs[tno];
953 SVal* tree = &src->svals[cloff];
954
955 /* sequentialise the tree described by (descr,tree). */
956# define PUT(_n,_v) \
957 do { dst[dstUsed ].count = (_n); \
958 dst[dstUsed++].sval = (_v); \
959 } while (0)
960
961 /* byte 0 */
962 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
963 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
964 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
965 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
966 /* byte 1 */
967 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
968 /* byte 2 */
969 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
970 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
971 /* byte 3 */
972 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
973 /* byte 4 */
974 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
975 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
976 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
977 /* byte 5 */
978 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
979 /* byte 6 */
980 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
981 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
982 /* byte 7 */
983 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
984
985# undef PUT
986 /* END sequentialise the tree described by (descr,tree). */
987
988 }
989 tl_assert(cloff == N_LINE_ARANGE);
990 tl_assert(dstUsed <= nDst);
991
992 *dstUsedP = dstUsed;
993}
994
995/* Write the cacheline 'wix' to backing store. Where it ends up
996 is determined by its tag field. */
997static __attribute__((noinline)) void cacheline_wback ( UWord wix )
998{
999 Word i, j, k, m;
1000 Addr tag;
1001 SecMap* sm;
1002 CacheLine* cl;
1003 LineZ* lineZ;
1004 LineF* lineF;
1005 Word zix, fix, csvalsUsed;
1006 CountedSVal csvals[N_LINE_ARANGE];
1007 SVal sv;
1008
1009 if (0)
1010 VG_(printf)("scache wback line %d\n", (Int)wix);
1011
1012 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1013
1014 tag = cache_shmem.tags0[wix];
1015 cl = &cache_shmem.lyns0[wix];
1016
1017 /* The cache line may have been invalidated; if so, ignore it. */
1018 if (!is_valid_scache_tag(tag))
1019 return;
1020
1021 /* Where are we going to put it? */
1022 sm = NULL;
1023 lineZ = NULL;
1024 lineF = NULL;
1025 zix = fix = -1;
1026
1027 /* find the Z line to write in and rcdec it or the associated F
1028 line. */
1029 find_Z_for_writing( &sm, &zix, tag );
1030
1031 tl_assert(sm);
1032 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1033 lineZ = &sm->linesZ[zix];
1034
1035 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001036 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001037 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1038
1039 csvalsUsed = -1;
1040 sequentialise_CacheLine( csvals, &csvalsUsed,
1041 N_LINE_ARANGE, cl );
1042 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1043 if (0) VG_(printf)("%lu ", csvalsUsed);
1044
1045 lineZ->dict[0] = lineZ->dict[1]
1046 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1047
1048 /* i indexes actual shadow values, k is cursor in csvals */
1049 i = 0;
1050 for (k = 0; k < csvalsUsed; k++) {
1051
1052 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001053 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001054 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1055 /* do we already have it? */
1056 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1057 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1058 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1059 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1060 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001061 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001062 tl_assert(sv != SVal_INVALID);
1063 if (lineZ->dict[0]
1064 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1065 if (lineZ->dict[1]
1066 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1067 if (lineZ->dict[2]
1068 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1069 if (lineZ->dict[3]
1070 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1071 break; /* we'll have to use the f rep */
1072 dict_ok:
1073 m = csvals[k].count;
1074 if (m == 8) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 write_twobit_array( lineZ->ix2s, i+1, j );
1077 write_twobit_array( lineZ->ix2s, i+2, j );
1078 write_twobit_array( lineZ->ix2s, i+3, j );
1079 write_twobit_array( lineZ->ix2s, i+4, j );
1080 write_twobit_array( lineZ->ix2s, i+5, j );
1081 write_twobit_array( lineZ->ix2s, i+6, j );
1082 write_twobit_array( lineZ->ix2s, i+7, j );
1083 i += 8;
1084 }
1085 else if (m == 4) {
1086 write_twobit_array( lineZ->ix2s, i+0, j );
1087 write_twobit_array( lineZ->ix2s, i+1, j );
1088 write_twobit_array( lineZ->ix2s, i+2, j );
1089 write_twobit_array( lineZ->ix2s, i+3, j );
1090 i += 4;
1091 }
1092 else if (m == 1) {
1093 write_twobit_array( lineZ->ix2s, i+0, j );
1094 i += 1;
1095 }
1096 else if (m == 2) {
1097 write_twobit_array( lineZ->ix2s, i+0, j );
1098 write_twobit_array( lineZ->ix2s, i+1, j );
1099 i += 2;
1100 }
1101 else {
1102 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1103 }
1104
1105 }
1106
1107 if (LIKELY(i == N_LINE_ARANGE)) {
1108 /* Construction of the compressed representation was
1109 successful. */
1110 rcinc_LineZ(lineZ);
1111 stats__cache_Z_wbacks++;
1112 } else {
1113 /* Cannot use the compressed(z) representation. Use the full(f)
1114 rep instead. */
1115 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1116 alloc_F_for_writing( sm, &fix );
1117 tl_assert(sm->linesF);
1118 tl_assert(sm->linesF_size > 0);
1119 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1120 lineF = &sm->linesF[fix];
1121 tl_assert(!lineF->inUse);
1122 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1123 lineZ->dict[1] = (SVal)fix;
1124 lineF->inUse = True;
1125 i = 0;
1126 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001127 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001128 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1129 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001130 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001131 tl_assert(sv != SVal_INVALID);
1132 for (m = csvals[k].count; m > 0; m--) {
1133 lineF->w64s[i] = sv;
1134 i++;
1135 }
1136 }
1137 tl_assert(i == N_LINE_ARANGE);
1138 rcinc_LineF(lineF);
1139 stats__cache_F_wbacks++;
1140 }
sewardjf98e1c02008-10-25 16:22:41 +00001141}
1142
1143/* Fetch the cacheline 'wix' from the backing store. The tag
1144 associated with 'wix' is assumed to have already been filled in;
1145 hence that is used to determine where in the backing store to read
1146 from. */
1147static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1148{
1149 Word i;
1150 Addr tag;
1151 CacheLine* cl;
1152 LineZ* lineZ;
1153 LineF* lineF;
1154
1155 if (0)
1156 VG_(printf)("scache fetch line %d\n", (Int)wix);
1157
1158 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1159
1160 tag = cache_shmem.tags0[wix];
1161 cl = &cache_shmem.lyns0[wix];
1162
1163 /* reject nonsense requests */
1164 tl_assert(is_valid_scache_tag(tag));
1165
1166 lineZ = NULL;
1167 lineF = NULL;
1168 find_ZF_for_reading( &lineZ, &lineF, tag );
1169 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1170
1171 /* expand the data into the bottom layer of the tree, then get
1172 cacheline_normalise to build the descriptor array. */
1173 if (lineF) {
1174 tl_assert(lineF->inUse);
1175 for (i = 0; i < N_LINE_ARANGE; i++) {
1176 cl->svals[i] = lineF->w64s[i];
1177 }
1178 stats__cache_F_fetches++;
1179 } else {
1180 for (i = 0; i < N_LINE_ARANGE; i++) {
1181 SVal sv;
1182 UWord ix = read_twobit_array( lineZ->ix2s, i );
1183 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1184 sv = lineZ->dict[ix];
1185 tl_assert(sv != SVal_INVALID);
1186 cl->svals[i] = sv;
1187 }
1188 stats__cache_Z_fetches++;
1189 }
1190 normalise_CacheLine( cl );
1191}
1192
1193static void shmem__invalidate_scache ( void ) {
1194 Word wix;
1195 if (0) VG_(printf)("%s","scache inval\n");
1196 tl_assert(!is_valid_scache_tag(1));
1197 for (wix = 0; wix < N_WAY_NENT; wix++) {
1198 cache_shmem.tags0[wix] = 1/*INVALID*/;
1199 }
1200 stats__cache_invals++;
1201}
1202
1203static void shmem__flush_and_invalidate_scache ( void ) {
1204 Word wix;
1205 Addr tag;
1206 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1207 tl_assert(!is_valid_scache_tag(1));
1208 for (wix = 0; wix < N_WAY_NENT; wix++) {
1209 tag = cache_shmem.tags0[wix];
1210 if (tag == 1/*INVALID*/) {
1211 /* already invalid; nothing to do */
1212 } else {
1213 tl_assert(is_valid_scache_tag(tag));
1214 cacheline_wback( wix );
1215 }
1216 cache_shmem.tags0[wix] = 1/*INVALID*/;
1217 }
1218 stats__cache_flushes++;
1219 stats__cache_invals++;
1220}
1221
1222
1223static inline Bool aligned16 ( Addr a ) {
1224 return 0 == (a & 1);
1225}
1226static inline Bool aligned32 ( Addr a ) {
1227 return 0 == (a & 3);
1228}
1229static inline Bool aligned64 ( Addr a ) {
1230 return 0 == (a & 7);
1231}
1232static inline UWord get_cacheline_offset ( Addr a ) {
1233 return (UWord)(a & (N_LINE_ARANGE - 1));
1234}
1235static inline Addr cacheline_ROUNDUP ( Addr a ) {
1236 return ROUNDUP(a, N_LINE_ARANGE);
1237}
1238static inline Addr cacheline_ROUNDDN ( Addr a ) {
1239 return ROUNDDN(a, N_LINE_ARANGE);
1240}
1241static inline UWord get_treeno ( Addr a ) {
1242 return get_cacheline_offset(a) >> 3;
1243}
1244static inline UWord get_tree_offset ( Addr a ) {
1245 return a & 7;
1246}
1247
1248static __attribute__((noinline))
1249 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1250static inline CacheLine* get_cacheline ( Addr a )
1251{
1252 /* tag is 'a' with the in-line offset masked out,
1253 eg a[31]..a[4] 0000 */
1254 Addr tag = a & ~(N_LINE_ARANGE - 1);
1255 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1256 stats__cache_totrefs++;
1257 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1258 return &cache_shmem.lyns0[wix];
1259 } else {
1260 return get_cacheline_MISS( a );
1261 }
1262}
1263
1264static __attribute__((noinline))
1265 CacheLine* get_cacheline_MISS ( Addr a )
1266{
1267 /* tag is 'a' with the in-line offset masked out,
1268 eg a[31]..a[4] 0000 */
1269
1270 CacheLine* cl;
1271 Addr* tag_old_p;
1272 Addr tag = a & ~(N_LINE_ARANGE - 1);
1273 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1274
1275 tl_assert(tag != cache_shmem.tags0[wix]);
1276
1277 /* Dump the old line into the backing store. */
1278 stats__cache_totmisses++;
1279
1280 cl = &cache_shmem.lyns0[wix];
1281 tag_old_p = &cache_shmem.tags0[wix];
1282
1283 if (is_valid_scache_tag( *tag_old_p )) {
1284 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001285 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001286 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1287 cacheline_wback( wix );
1288 }
1289 /* and reload the new one */
1290 *tag_old_p = tag;
1291 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001292 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001293 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1294 return cl;
1295}
1296
1297static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1298 stats__cline_64to32pulldown++;
1299 switch (toff) {
1300 case 0: case 4:
1301 tl_assert(descr & TREE_DESCR_64);
1302 tree[4] = tree[0];
1303 descr &= ~TREE_DESCR_64;
1304 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1305 break;
1306 default:
1307 tl_assert(0);
1308 }
1309 return descr;
1310}
1311
1312static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1313 stats__cline_32to16pulldown++;
1314 switch (toff) {
1315 case 0: case 2:
1316 if (!(descr & TREE_DESCR_32_0)) {
1317 descr = pulldown_to_32(tree, 0, descr);
1318 }
1319 tl_assert(descr & TREE_DESCR_32_0);
1320 tree[2] = tree[0];
1321 descr &= ~TREE_DESCR_32_0;
1322 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1323 break;
1324 case 4: case 6:
1325 if (!(descr & TREE_DESCR_32_1)) {
1326 descr = pulldown_to_32(tree, 4, descr);
1327 }
1328 tl_assert(descr & TREE_DESCR_32_1);
1329 tree[6] = tree[4];
1330 descr &= ~TREE_DESCR_32_1;
1331 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1332 break;
1333 default:
1334 tl_assert(0);
1335 }
1336 return descr;
1337}
1338
1339static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1340 stats__cline_16to8pulldown++;
1341 switch (toff) {
1342 case 0: case 1:
1343 if (!(descr & TREE_DESCR_16_0)) {
1344 descr = pulldown_to_16(tree, 0, descr);
1345 }
1346 tl_assert(descr & TREE_DESCR_16_0);
1347 tree[1] = tree[0];
1348 descr &= ~TREE_DESCR_16_0;
1349 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1350 break;
1351 case 2: case 3:
1352 if (!(descr & TREE_DESCR_16_1)) {
1353 descr = pulldown_to_16(tree, 2, descr);
1354 }
1355 tl_assert(descr & TREE_DESCR_16_1);
1356 tree[3] = tree[2];
1357 descr &= ~TREE_DESCR_16_1;
1358 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1359 break;
1360 case 4: case 5:
1361 if (!(descr & TREE_DESCR_16_2)) {
1362 descr = pulldown_to_16(tree, 4, descr);
1363 }
1364 tl_assert(descr & TREE_DESCR_16_2);
1365 tree[5] = tree[4];
1366 descr &= ~TREE_DESCR_16_2;
1367 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1368 break;
1369 case 6: case 7:
1370 if (!(descr & TREE_DESCR_16_3)) {
1371 descr = pulldown_to_16(tree, 6, descr);
1372 }
1373 tl_assert(descr & TREE_DESCR_16_3);
1374 tree[7] = tree[6];
1375 descr &= ~TREE_DESCR_16_3;
1376 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1377 break;
1378 default:
1379 tl_assert(0);
1380 }
1381 return descr;
1382}
1383
1384
1385static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1386 UShort mask;
1387 switch (toff) {
1388 case 0:
1389 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1390 tl_assert( (descr & mask) == mask );
1391 descr &= ~mask;
1392 descr |= TREE_DESCR_16_0;
1393 break;
1394 case 2:
1395 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1396 tl_assert( (descr & mask) == mask );
1397 descr &= ~mask;
1398 descr |= TREE_DESCR_16_1;
1399 break;
1400 case 4:
1401 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1402 tl_assert( (descr & mask) == mask );
1403 descr &= ~mask;
1404 descr |= TREE_DESCR_16_2;
1405 break;
1406 case 6:
1407 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1408 tl_assert( (descr & mask) == mask );
1409 descr &= ~mask;
1410 descr |= TREE_DESCR_16_3;
1411 break;
1412 default:
1413 tl_assert(0);
1414 }
1415 return descr;
1416}
1417
1418static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1419 UShort mask;
1420 switch (toff) {
1421 case 0:
1422 if (!(descr & TREE_DESCR_16_0))
1423 descr = pullup_descr_to_16(descr, 0);
1424 if (!(descr & TREE_DESCR_16_1))
1425 descr = pullup_descr_to_16(descr, 2);
1426 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1427 tl_assert( (descr & mask) == mask );
1428 descr &= ~mask;
1429 descr |= TREE_DESCR_32_0;
1430 break;
1431 case 4:
1432 if (!(descr & TREE_DESCR_16_2))
1433 descr = pullup_descr_to_16(descr, 4);
1434 if (!(descr & TREE_DESCR_16_3))
1435 descr = pullup_descr_to_16(descr, 6);
1436 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1437 tl_assert( (descr & mask) == mask );
1438 descr &= ~mask;
1439 descr |= TREE_DESCR_32_1;
1440 break;
1441 default:
1442 tl_assert(0);
1443 }
1444 return descr;
1445}
1446
1447static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1448 switch (toff) {
1449 case 0: case 4:
1450 return 0 != (descr & TREE_DESCR_64);
1451 default:
1452 tl_assert(0);
1453 }
1454}
1455
1456static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1457 switch (toff) {
1458 case 0:
1459 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1460 case 2:
1461 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1462 case 4:
1463 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1464 case 6:
1465 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1466 default:
1467 tl_assert(0);
1468 }
1469}
1470
1471/* ------------ Cache management ------------ */
1472
1473static void zsm_flush_cache ( void )
1474{
1475 shmem__flush_and_invalidate_scache();
1476}
1477
1478
1479static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1480{
1481 tl_assert( sizeof(UWord) == sizeof(Addr) );
1482
1483 rcinc = p_rcinc;
1484 rcdec = p_rcdec;
1485
1486 tl_assert(map_shmem == NULL);
1487 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1488 HG_(free),
1489 NULL/*unboxed UWord cmp*/);
1490 tl_assert(map_shmem != NULL);
1491 shmem__invalidate_scache();
1492
1493 /* a SecMap must contain an integral number of CacheLines */
1494 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1495 /* also ... a CacheLine holds an integral number of trees */
1496 tl_assert(0 == (N_LINE_ARANGE % 8));
1497}
1498
1499/////////////////////////////////////////////////////////////////
1500/////////////////////////////////////////////////////////////////
1501// //
1502// SECTION END compressed shadow memory //
1503// //
1504/////////////////////////////////////////////////////////////////
1505/////////////////////////////////////////////////////////////////
1506
1507
1508
1509/////////////////////////////////////////////////////////////////
1510/////////////////////////////////////////////////////////////////
1511// //
1512// SECTION BEGIN vts primitives //
1513// //
1514/////////////////////////////////////////////////////////////////
1515/////////////////////////////////////////////////////////////////
1516
1517#ifndef __HB_VTS_H
1518#define __HB_VTS_H
1519
1520/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1521 lowest 30 bits of an SVal. */
1522typedef UInt VtsID;
1523#define VtsID_INVALID 0xFFFFFFFF
1524
1525/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1526 a backlink for the caller's convenience. Since we have no idea
1527 what to set that to in the library, it always gets set to
1528 VtsID_INVALID. */
1529typedef
1530 struct {
1531 VtsID id;
1532 XArray* ts; /* XArray* ScalarTS(abstract) */
1533 }
1534 VTS;
1535
1536
1537/* Create a new, empty VTS. */
sewardj23f12002009-07-24 08:45:08 +00001538static VTS* VTS__new ( void );
sewardjf98e1c02008-10-25 16:22:41 +00001539
1540/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001541static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001542
1543/* Create a new singleton VTS. */
sewardj23f12002009-07-24 08:45:08 +00001544static VTS* VTS__singleton ( Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001545
1546/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1547 not modified. */
sewardj23f12002009-07-24 08:45:08 +00001548static VTS* VTS__tick ( Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001549
1550/* Return a new VTS constructed as the join (max) of the 2 args.
1551 Neither arg is modified. */
sewardj23f12002009-07-24 08:45:08 +00001552static VTS* VTS__join ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001553
sewardj23f12002009-07-24 08:45:08 +00001554/* Compute the partial ordering relation of the two args. Although we
1555 could be completely general and return an enumeration value (EQ,
1556 LT, GT, UN), in fact we only need LEQ, and so we may as well
1557 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001558
sewardj23f12002009-07-24 08:45:08 +00001559 Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
1560 the returned Thr* indicates the discovered point for which they are
1561 not. There may be more than one such point, but we only care about
1562 seeing one of them, not all of them. This rather strange
1563 convention is used because sometimes we want to know the actual
1564 index at which they first differ. */
1565static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001566
1567/* Compute an arbitrary structural (total) ordering on the two args,
1568 based on their VCs, so they can be looked up in a table, tree, etc.
1569 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001570static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001571
1572/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001573static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001574
1575/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001576static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001577
1578#endif /* ! __HB_VTS_H */
1579
1580
1581/*--------------- to do with Vector Timestamps ---------------*/
1582
1583/* Scalar Timestamp */
1584typedef
1585 struct {
1586 Thr* thr;
1587 ULong tym;
1588 }
1589 ScalarTS;
1590
1591
1592static Bool is_sane_VTS ( VTS* vts )
1593{
1594 UWord i, n;
1595 ScalarTS *st1, *st2;
1596 if (!vts) return False;
1597 if (!vts->ts) return False;
1598 n = VG_(sizeXA)( vts->ts );
1599 if (n >= 2) {
1600 for (i = 0; i < n-1; i++) {
1601 st1 = VG_(indexXA)( vts->ts, i );
1602 st2 = VG_(indexXA)( vts->ts, i+1 );
1603 if (st1->thr >= st2->thr)
1604 return False;
1605 if (st1->tym == 0 || st2->tym == 0)
1606 return False;
1607 }
1608 }
1609 return True;
1610}
1611
1612
1613/* Create a new, empty VTS.
1614*/
1615VTS* VTS__new ( void )
1616{
1617 VTS* vts;
1618 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1619 tl_assert(vts);
1620 vts->id = VtsID_INVALID;
1621 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1622 HG_(free), sizeof(ScalarTS) );
1623 tl_assert(vts->ts);
1624 return vts;
1625}
1626
1627
1628/* Delete this VTS in its entirety.
1629*/
1630void VTS__delete ( VTS* vts )
1631{
1632 tl_assert(vts);
1633 tl_assert(vts->ts);
1634 VG_(deleteXA)( vts->ts );
1635 HG_(free)(vts);
1636}
1637
1638
1639/* Create a new singleton VTS.
1640*/
1641VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1642 ScalarTS st;
1643 VTS* vts;
1644 tl_assert(thr);
1645 tl_assert(tym >= 1);
1646 vts = VTS__new();
1647 st.thr = thr;
1648 st.tym = tym;
1649 VG_(addToXA)( vts->ts, &st );
1650 return vts;
1651}
1652
1653
1654/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1655 not modified.
1656*/
1657VTS* VTS__tick ( Thr* me, VTS* vts )
1658{
1659 ScalarTS* here = NULL;
1660 ScalarTS tmp;
1661 VTS* res;
1662 Word i, n;
1663 tl_assert(me);
1664 tl_assert(is_sane_VTS(vts));
1665 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1666 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1667 res = VTS__new();
1668 n = VG_(sizeXA)( vts->ts );
1669
1670 /* main loop doesn't handle zero-entry case correctly, so
1671 special-case it. */
1672 if (n == 0) {
1673 tmp.thr = me;
1674 tmp.tym = 1;
1675 VG_(addToXA)( res->ts, &tmp );
1676 tl_assert(is_sane_VTS(res));
1677 return res;
1678 }
1679
1680 for (i = 0; i < n; i++) {
1681 here = VG_(indexXA)( vts->ts, i );
1682 if (me < here->thr) {
1683 /* We just went past 'me', without seeing it. */
1684 tmp.thr = me;
1685 tmp.tym = 1;
1686 VG_(addToXA)( res->ts, &tmp );
1687 tmp = *here;
1688 VG_(addToXA)( res->ts, &tmp );
1689 i++;
1690 break;
1691 }
1692 else if (me == here->thr) {
1693 tmp = *here;
1694 tmp.tym++;
1695 VG_(addToXA)( res->ts, &tmp );
1696 i++;
1697 break;
1698 }
1699 else /* me > here->thr */ {
1700 tmp = *here;
1701 VG_(addToXA)( res->ts, &tmp );
1702 }
1703 }
1704 tl_assert(i >= 0 && i <= n);
1705 if (i == n && here && here->thr < me) {
1706 tmp.thr = me;
1707 tmp.tym = 1;
1708 VG_(addToXA)( res->ts, &tmp );
1709 } else {
1710 for (/*keepgoing*/; i < n; i++) {
1711 here = VG_(indexXA)( vts->ts, i );
1712 tmp = *here;
1713 VG_(addToXA)( res->ts, &tmp );
1714 }
1715 }
1716 tl_assert(is_sane_VTS(res));
1717 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1718 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1719 return res;
1720}
1721
1722
1723/* Return a new VTS constructed as the join (max) of the 2 args.
1724 Neither arg is modified.
1725*/
1726VTS* VTS__join ( VTS* a, VTS* b )
1727{
1728 Word ia, ib, useda, usedb;
1729 ULong tyma, tymb, tymMax;
1730 Thr* thr;
1731 VTS* res;
sewardjf98e1c02008-10-25 16:22:41 +00001732
1733 tl_assert(a && a->ts);
1734 tl_assert(b && b->ts);
1735 useda = VG_(sizeXA)( a->ts );
1736 usedb = VG_(sizeXA)( b->ts );
1737
1738 res = VTS__new();
1739 ia = ib = 0;
1740
1741 while (1) {
1742
1743 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1744 from a and b in order, where thr is the next Thr*
1745 occurring in either a or b, and tyma/b are the relevant
1746 scalar timestamps, taking into account implicit zeroes. */
1747 tl_assert(ia >= 0 && ia <= useda);
1748 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001749
njn4c245e52009-03-15 23:25:38 +00001750 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001751 /* both empty - done */
1752 break;
njn4c245e52009-03-15 23:25:38 +00001753
1754 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001755 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001756 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001757 thr = tmpb->thr;
1758 tyma = 0;
1759 tymb = tmpb->tym;
1760 ib++;
njn4c245e52009-03-15 23:25:38 +00001761
1762 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001763 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001764 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001765 thr = tmpa->thr;
1766 tyma = tmpa->tym;
1767 tymb = 0;
1768 ia++;
njn4c245e52009-03-15 23:25:38 +00001769
1770 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001771 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001772 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1773 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001774 if (tmpa->thr < tmpb->thr) {
1775 /* a has the lowest unconsidered Thr* */
1776 thr = tmpa->thr;
1777 tyma = tmpa->tym;
1778 tymb = 0;
1779 ia++;
njn4c245e52009-03-15 23:25:38 +00001780 } else if (tmpa->thr > tmpb->thr) {
sewardjf98e1c02008-10-25 16:22:41 +00001781 /* b has the lowest unconsidered Thr* */
1782 thr = tmpb->thr;
1783 tyma = 0;
1784 tymb = tmpb->tym;
1785 ib++;
1786 } else {
1787 /* they both next mention the same Thr* */
1788 tl_assert(tmpa->thr == tmpb->thr);
1789 thr = tmpa->thr; /* == tmpb->thr */
1790 tyma = tmpa->tym;
1791 tymb = tmpb->tym;
1792 ia++;
1793 ib++;
1794 }
1795 }
1796
1797 /* having laboriously determined (thr, tyma, tymb), do something
1798 useful with it. */
1799 tymMax = tyma > tymb ? tyma : tymb;
1800 if (tymMax > 0) {
1801 ScalarTS st;
1802 st.thr = thr;
1803 st.tym = tymMax;
1804 VG_(addToXA)( res->ts, &st );
1805 }
1806
1807 }
1808
1809 tl_assert(is_sane_VTS( res ));
1810
1811 return res;
1812}
1813
1814
sewardj23f12002009-07-24 08:45:08 +00001815/* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
1816 they are, or the first Thr* for which they are not. This rather
1817 strange convention is used because sometimes we want to know the
1818 actual index at which they first differ. */
1819static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001820{
sewardj23f12002009-07-24 08:45:08 +00001821 Word ia, ib, useda, usedb;
1822 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001823
1824 tl_assert(a && a->ts);
1825 tl_assert(b && b->ts);
1826 useda = VG_(sizeXA)( a->ts );
1827 usedb = VG_(sizeXA)( b->ts );
1828
1829 ia = ib = 0;
1830
1831 while (1) {
1832
njn4c245e52009-03-15 23:25:38 +00001833 /* This logic is to enumerate doubles (tyma, tymb) drawn
1834 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001835 scalar timestamps, taking into account implicit zeroes. */
sewardj23f12002009-07-24 08:45:08 +00001836 Thr* thr;
1837
sewardjf98e1c02008-10-25 16:22:41 +00001838 tl_assert(ia >= 0 && ia <= useda);
1839 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001840
njn4c245e52009-03-15 23:25:38 +00001841 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001842 /* both empty - done */
1843 break;
njn4c245e52009-03-15 23:25:38 +00001844
1845 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001846 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001847 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001848 tyma = 0;
1849 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001850 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001851 ib++;
njn4c245e52009-03-15 23:25:38 +00001852
1853 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001854 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001855 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001856 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001857 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001858 tymb = 0;
1859 ia++;
njn4c245e52009-03-15 23:25:38 +00001860
1861 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001862 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001863 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1864 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001865 if (tmpa->thr < tmpb->thr) {
1866 /* a has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001867 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001868 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001869 tymb = 0;
1870 ia++;
1871 }
1872 else
1873 if (tmpa->thr > tmpb->thr) {
1874 /* b has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001875 tyma = 0;
1876 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001877 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001878 ib++;
1879 } else {
1880 /* they both next mention the same Thr* */
1881 tl_assert(tmpa->thr == tmpb->thr);
sewardjf98e1c02008-10-25 16:22:41 +00001882 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001883 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001884 tymb = tmpb->tym;
1885 ia++;
1886 ib++;
1887 }
1888 }
1889
njn4c245e52009-03-15 23:25:38 +00001890 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00001891 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00001892 if (tyma > tymb) {
1893 /* not LEQ at this index. Quit, since the answer is
1894 determined already. */
1895 tl_assert(thr);
1896 return thr;
1897 }
sewardjf98e1c02008-10-25 16:22:41 +00001898 }
1899
sewardj23f12002009-07-24 08:45:08 +00001900 return NULL; /* all points are LEQ */
sewardjf98e1c02008-10-25 16:22:41 +00001901}
1902
1903
1904/* Compute an arbitrary structural (total) ordering on the two args,
1905 based on their VCs, so they can be looked up in a table, tree, etc.
1906 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1907*/
1908Word VTS__cmp_structural ( VTS* a, VTS* b )
1909{
1910 /* We just need to generate an arbitrary total ordering based on
1911 a->ts and b->ts. Preferably do it in a way which comes across likely
1912 differences relatively quickly. */
1913 Word i, useda, usedb;
1914 ScalarTS *tmpa, *tmpb;
1915
1916 tl_assert(a && a->ts);
1917 tl_assert(b && b->ts);
1918 useda = VG_(sizeXA)( a->ts );
1919 usedb = VG_(sizeXA)( b->ts );
1920
1921 if (useda < usedb) return -1;
1922 if (useda > usedb) return 1;
1923
1924 /* Same length vectors, so let's step through them together. */
1925 tl_assert(useda == usedb);
1926 for (i = 0; i < useda; i++) {
1927 tmpa = VG_(indexXA)( a->ts, i );
1928 tmpb = VG_(indexXA)( b->ts, i );
1929 if (tmpa->tym < tmpb->tym) return -1;
1930 if (tmpa->tym > tmpb->tym) return 1;
1931 if (tmpa->thr < tmpb->thr) return -1;
1932 if (tmpa->thr > tmpb->thr) return 1;
1933 }
1934
1935 /* They're identical. */
1936 return 0;
1937}
1938
1939
1940/* Debugging only. Display the given VTS in the buffer.
1941*/
1942void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1943 ScalarTS* st;
1944 HChar unit[64];
1945 Word i, n;
1946 Int avail = nBuf;
1947 tl_assert(vts && vts->ts);
1948 tl_assert(nBuf > 16);
1949 buf[0] = '[';
1950 buf[1] = 0;
1951 n = VG_(sizeXA)( vts->ts );
1952 for (i = 0; i < n; i++) {
1953 tl_assert(avail >= 40);
1954 st = VG_(indexXA)( vts->ts, i );
1955 VG_(memset)(unit, 0, sizeof(unit));
1956 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1957 st->thr, st->tym);
1958 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1959 VG_(strcat)(buf, " ...]");
1960 buf[nBuf-1] = 0;
1961 return;
1962 }
1963 VG_(strcat)(buf, unit);
1964 avail -= VG_(strlen)(unit);
1965 }
1966 VG_(strcat)(buf, "]");
1967 buf[nBuf-1] = 0;
1968}
1969
1970
1971/* Debugging only. Return vts[index], so to speak.
1972*/
1973ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1974 UWord i, n;
1975 tl_assert(vts && vts->ts);
1976 n = VG_(sizeXA)( vts->ts );
1977 for (i = 0; i < n; i++) {
1978 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1979 if (st->thr == idx)
1980 return st->tym;
1981 }
1982 return 0;
1983}
1984
1985
1986/////////////////////////////////////////////////////////////////
1987/////////////////////////////////////////////////////////////////
1988// //
1989// SECTION END vts primitives //
1990// //
1991/////////////////////////////////////////////////////////////////
1992/////////////////////////////////////////////////////////////////
1993
1994
1995
1996/////////////////////////////////////////////////////////////////
1997/////////////////////////////////////////////////////////////////
1998// //
1999// SECTION BEGIN main library //
2000// //
2001/////////////////////////////////////////////////////////////////
2002/////////////////////////////////////////////////////////////////
2003
2004
2005/////////////////////////////////////////////////////////
2006// //
2007// VTS set //
2008// //
2009/////////////////////////////////////////////////////////
2010
2011static WordFM* /* VTS* void void */ vts_set = NULL;
2012
2013static void vts_set_init ( void )
2014{
2015 tl_assert(!vts_set);
2016 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2017 HG_(free),
2018 (Word(*)(UWord,UWord))VTS__cmp_structural );
2019 tl_assert(vts_set);
2020}
2021
2022/* Given a newly made VTS, look in vts_set to see if we already have
2023 an identical one. If yes, free up this one and return instead a
2024 pointer to the existing one. If no, add this one to the set and
2025 return the same pointer. Caller differentiates the two cases by
2026 comparing returned pointer with the supplied one (although that
2027 does require that the supplied VTS is not already in the set).
2028*/
2029static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2030{
2031 UWord keyW, valW;
2032 /* lookup cand (by value) */
2033 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2034 /* found it */
2035 tl_assert(valW == 0);
2036 /* if this fails, cand (by ref) was already present (!) */
2037 tl_assert(keyW != (UWord)cand);
2038 VTS__delete(cand);
2039 return (VTS*)keyW;
2040 } else {
2041 /* not present. Add and return pointer to same. */
2042 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2043 return cand;
2044 }
2045}
2046
2047
2048/////////////////////////////////////////////////////////
2049// //
2050// VTS table //
2051// //
2052/////////////////////////////////////////////////////////
2053
2054static void VtsID__invalidate_caches ( void ); /* fwds */
2055
2056/* A type to hold VTS table entries. Invariants:
2057 If .vts == NULL, then this entry is not in use, so:
2058 - .rc == 0
2059 - this entry is on the freelist (unfortunately, does not imply
2060 any constraints on value for .nextfree)
2061 If .vts != NULL, then this entry is in use:
2062 - .vts is findable in vts_set
2063 - .vts->id == this entry number
2064 - no specific value for .rc (even 0 is OK)
2065 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2066*/
2067typedef
2068 struct {
2069 VTS* vts; /* vts, in vts_set */
2070 UWord rc; /* reference count - enough for entire aspace */
2071 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2072 }
2073 VtsTE;
2074
2075/* The VTS table. */
2076static XArray* /* of VtsTE */ vts_tab = NULL;
2077
2078/* An index into the VTS table, indicating the start of the list of
2079 free (available for use) entries. If the list is empty, this is
2080 VtsID_INVALID. */
2081static VtsID vts_tab_freelist = VtsID_INVALID;
2082
2083/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2084 vts_tab equals or exceeds this size. After GC, the value here is
2085 set appropriately so as to check for the next GC point. */
2086static Word vts_next_GC_at = 1000;
2087
2088static void vts_tab_init ( void )
2089{
2090 vts_tab
2091 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2092 HG_(free), sizeof(VtsTE) );
2093 vts_tab_freelist
2094 = VtsID_INVALID;
2095 tl_assert(vts_tab);
2096}
2097
2098/* Add ii to the free list, checking that it looks out-of-use. */
2099static void add_to_free_list ( VtsID ii )
2100{
2101 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2102 tl_assert(ie->vts == NULL);
2103 tl_assert(ie->rc == 0);
2104 tl_assert(ie->freelink == VtsID_INVALID);
2105 ie->freelink = vts_tab_freelist;
2106 vts_tab_freelist = ii;
2107}
2108
2109/* Get an entry from the free list. This will return VtsID_INVALID if
2110 the free list is empty. */
2111static VtsID get_from_free_list ( void )
2112{
2113 VtsID ii;
2114 VtsTE* ie;
2115 if (vts_tab_freelist == VtsID_INVALID)
2116 return VtsID_INVALID;
2117 ii = vts_tab_freelist;
2118 ie = VG_(indexXA)( vts_tab, ii );
2119 tl_assert(ie->vts == NULL);
2120 tl_assert(ie->rc == 0);
2121 vts_tab_freelist = ie->freelink;
2122 return ii;
2123}
2124
2125/* Produce a new VtsID that can be used, either by getting it from
2126 the freelist, or, if that is empty, by expanding vts_tab. */
2127static VtsID get_new_VtsID ( void )
2128{
2129 VtsID ii;
2130 VtsTE te;
2131 ii = get_from_free_list();
2132 if (ii != VtsID_INVALID)
2133 return ii;
2134 te.vts = NULL;
2135 te.rc = 0;
2136 te.freelink = VtsID_INVALID;
2137 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2138 return ii;
2139}
2140
2141
2142/* Indirect callback from lib_zsm. */
2143static void VtsID__rcinc ( VtsID ii )
2144{
2145 VtsTE* ie;
2146 /* VG_(indexXA) does a range check for us */
2147 ie = VG_(indexXA)( vts_tab, ii );
2148 tl_assert(ie->vts); /* else it's not in use */
2149 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2150 tl_assert(ie->vts->id == ii);
2151 ie->rc++;
2152}
2153
2154/* Indirect callback from lib_zsm. */
2155static void VtsID__rcdec ( VtsID ii )
2156{
2157 VtsTE* ie;
2158 /* VG_(indexXA) does a range check for us */
2159 ie = VG_(indexXA)( vts_tab, ii );
2160 tl_assert(ie->vts); /* else it's not in use */
2161 tl_assert(ie->rc > 0); /* else RC snafu */
2162 tl_assert(ie->vts->id == ii);
2163 ie->rc--;
2164}
2165
2166
2167/* Look up 'cand' in our collection of VTSs. If present, deallocate
2168 it and return the VtsID for the pre-existing version. If not
2169 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2170 for it, and return that. */
2171static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2172{
2173 VTS* auld;
2174 tl_assert(cand->id == VtsID_INVALID);
2175 auld = vts_set__find_and_dealloc__or_add(cand);
2176 if (auld != cand) {
2177 /* We already have an Aulde one. Use that. */
2178 VtsTE* ie;
2179 tl_assert(auld->id != VtsID_INVALID);
2180 ie = VG_(indexXA)( vts_tab, auld->id );
2181 tl_assert(ie->vts == auld);
2182 return auld->id;
2183 } else {
2184 VtsID ii = get_new_VtsID();
2185 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2186 ie->vts = cand;
2187 ie->rc = 0;
2188 ie->freelink = VtsID_INVALID;
2189 cand->id = ii;
2190 return ii;
2191 }
2192}
2193
2194
2195static void show_vts_stats ( HChar* caller )
2196{
2197 UWord nSet, nTab, nLive;
2198 ULong totrc;
2199 UWord n, i;
2200 nSet = VG_(sizeFM)( vts_set );
2201 nTab = VG_(sizeXA)( vts_tab );
2202 totrc = 0;
2203 nLive = 0;
2204 n = VG_(sizeXA)( vts_tab );
2205 for (i = 0; i < n; i++) {
2206 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2207 if (ie->vts) {
2208 nLive++;
2209 totrc += (ULong)ie->rc;
2210 } else {
2211 tl_assert(ie->rc == 0);
2212 }
2213 }
2214 VG_(printf)(" show_vts_stats %s\n", caller);
2215 VG_(printf)(" vts_tab size %4lu\n", nTab);
2216 VG_(printf)(" vts_tab live %4lu\n", nLive);
2217 VG_(printf)(" vts_set size %4lu\n", nSet);
2218 VG_(printf)(" total rc %4llu\n", totrc);
2219}
2220
2221/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002222__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002223static void vts_tab__do_GC ( Bool show_stats )
2224{
2225 UWord i, nTab, nLive, nFreed;
2226
2227 /* check this is actually necessary. */
2228 tl_assert(vts_tab_freelist == VtsID_INVALID);
2229
2230 /* empty the caches for partial order checks and binary joins. We
2231 could do better and prune out the entries to be deleted, but it
2232 ain't worth the hassle. */
2233 VtsID__invalidate_caches();
2234
2235 /* First, make the reference counts up to date. */
2236 zsm_flush_cache();
2237
2238 nTab = VG_(sizeXA)( vts_tab );
2239
2240 if (show_stats) {
2241 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2242 show_vts_stats("before GC");
2243 }
2244
2245 /* Now we can inspect the entire vts_tab. Any entries
2246 with zero .rc fields are now no longer in use and can be
2247 free list, removed from vts_set, and deleted. */
2248 nFreed = 0;
2249 for (i = 0; i < nTab; i++) {
2250 Bool present;
2251 UWord oldK = 0, oldV = 0;
2252 VtsTE* te = VG_(indexXA)( vts_tab, i );
2253 if (te->vts == NULL) {
2254 tl_assert(te->rc == 0);
2255 continue; /* already on the free list (presumably) */
2256 }
2257 if (te->rc > 0)
2258 continue; /* in use */
2259 /* Ok, we got one we can free. */
2260 tl_assert(te->vts->id == i);
2261 /* first, remove it from vts_set. */
2262 present = VG_(delFromFM)( vts_set,
2263 &oldK, &oldV, (UWord)te->vts );
2264 tl_assert(present); /* else it isn't in vts_set ?! */
2265 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2266 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2267 /* now free the VTS itself */
2268 VTS__delete(te->vts);
2269 te->vts = NULL;
2270 /* and finally put this entry on the free list */
2271 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2272 add_to_free_list( i );
2273 nFreed++;
2274 }
2275
2276 /* Now figure out when the next GC should be. We'll allow the
2277 number of VTSs to double before GCing again. Except of course
2278 that since we can't (or, at least, don't) shrink vts_tab, we
2279 can't set the threshhold value smaller than it. */
2280 tl_assert(nFreed <= nTab);
2281 nLive = nTab - nFreed;
2282 tl_assert(nLive >= 0 && nLive <= nTab);
2283 vts_next_GC_at = 2 * nLive;
2284 if (vts_next_GC_at < nTab)
2285 vts_next_GC_at = nTab;
2286
2287 if (show_stats) {
2288 show_vts_stats("after GC");
2289 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2290 }
2291
sewardj5e2ac3b2009-08-11 10:39:25 +00002292 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00002293 static UInt ctr = 0;
2294 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002295 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002296 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002297 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002298 }
2299}
2300
2301
2302/////////////////////////////////////////////////////////
2303// //
2304// Vts IDs //
2305// //
2306/////////////////////////////////////////////////////////
2307
2308//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00002309static ULong stats__cmpLEQ_queries = 0;
2310static ULong stats__cmpLEQ_misses = 0;
2311static ULong stats__join2_queries = 0;
2312static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002313
2314static inline UInt ROL32 ( UInt w, Int n ) {
2315 w = (w << n) | (w >> (32-n));
2316 return w;
2317}
2318static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2319 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2320 return hash % nTab;
2321}
2322
sewardj23f12002009-07-24 08:45:08 +00002323#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00002324static
sewardj23f12002009-07-24 08:45:08 +00002325 struct { VtsID vi1; VtsID vi2; Bool leq; }
2326 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00002327
2328#define N_JOIN2_CACHE 1023
2329static
2330 struct { VtsID vi1; VtsID vi2; VtsID res; }
2331 join2_cache[N_JOIN2_CACHE];
2332
2333static void VtsID__invalidate_caches ( void ) {
2334 Int i;
sewardj23f12002009-07-24 08:45:08 +00002335 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2336 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2337 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2338 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00002339 }
2340 for (i = 0; i < N_JOIN2_CACHE; i++) {
2341 join2_cache[i].vi1 = VtsID_INVALID;
2342 join2_cache[i].vi2 = VtsID_INVALID;
2343 join2_cache[i].res = VtsID_INVALID;
2344 }
2345}
2346//////////////////////////
2347
sewardjd52392d2008-11-08 20:36:26 +00002348//static Bool VtsID__is_valid ( VtsID vi ) {
2349// VtsTE* ve;
2350// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2351// return False;
2352// ve = VG_(indexXA)( vts_tab, vi );
2353// if (!ve->vts)
2354// return False;
2355// tl_assert(ve->vts->id == vi);
2356// return True;
2357//}
sewardjf98e1c02008-10-25 16:22:41 +00002358
2359static VTS* VtsID__to_VTS ( VtsID vi ) {
2360 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2361 tl_assert(te->vts);
2362 return te->vts;
2363}
2364
2365static void VtsID__pp ( VtsID vi ) {
2366 HChar buf[100];
2367 VTS* vts = VtsID__to_VTS(vi);
2368 VTS__show( buf, sizeof(buf)-1, vts );
2369 buf[sizeof(buf)-1] = 0;
2370 VG_(printf)("%s", buf);
2371}
2372
2373/* compute partial ordering relation of vi1 and vi2. */
2374__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00002375static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00002376 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00002377 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00002378 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00002379 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00002380 tl_assert(vi1 != vi2);
2381 ////++
sewardj23f12002009-07-24 08:45:08 +00002382 stats__cmpLEQ_queries++;
2383 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2384 if (cmpLEQ_cache[hash].vi1 == vi1
2385 && cmpLEQ_cache[hash].vi2 == vi2)
2386 return cmpLEQ_cache[hash].leq;
2387 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00002388 ////--
2389 v1 = VtsID__to_VTS(vi1);
2390 v2 = VtsID__to_VTS(vi2);
sewardj23f12002009-07-24 08:45:08 +00002391 leq = VTS__cmpLEQ( v1, v2 ) == NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002392 ////++
sewardj23f12002009-07-24 08:45:08 +00002393 cmpLEQ_cache[hash].vi1 = vi1;
2394 cmpLEQ_cache[hash].vi2 = vi2;
2395 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00002396 ////--
sewardj23f12002009-07-24 08:45:08 +00002397 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00002398}
sewardj23f12002009-07-24 08:45:08 +00002399static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2400 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002401}
2402
2403/* compute binary join */
2404__attribute__((noinline))
2405static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2406 UInt hash;
2407 VtsID res;
2408 VTS *vts1, *vts2, *nyu;
2409 //if (vi1 == vi2) return vi1;
2410 tl_assert(vi1 != vi2);
2411 ////++
2412 stats__join2_queries++;
2413 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2414 if (join2_cache[hash].vi1 == vi1
2415 && join2_cache[hash].vi2 == vi2)
2416 return join2_cache[hash].res;
2417 stats__join2_misses++;
2418 ////--
2419 vts1 = VtsID__to_VTS(vi1);
2420 vts2 = VtsID__to_VTS(vi2);
2421 nyu = VTS__join(vts1,vts2);
2422 res = vts_tab__find_and_dealloc__or_add(nyu);
2423 ////++
2424 join2_cache[hash].vi1 = vi1;
2425 join2_cache[hash].vi2 = vi2;
2426 join2_cache[hash].res = res;
2427 ////--
2428 return res;
2429}
2430static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00002431 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002432}
2433
2434/* create a singleton VTS, namely [thr:1] */
2435static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2436 VTS* nyu = VTS__singleton(thr,tym);
2437 return vts_tab__find_and_dealloc__or_add(nyu);
2438}
2439
2440/* tick operation, creates value 1 if specified index is absent */
2441static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2442 VTS* vts = VtsID__to_VTS(vi);
2443 VTS* nyu = VTS__tick(idx,vts);
2444 return vts_tab__find_and_dealloc__or_add(nyu);
2445}
2446
2447/* index into a VTS (only for assertions) */
2448static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2449 VTS* vts = VtsID__to_VTS(vi);
2450 return VTS__indexAt_SLOW( vts, idx );
2451}
2452
sewardj23f12002009-07-24 08:45:08 +00002453/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2454 any, really) element in vi1 which is pointwise greater-than the
2455 corresponding element in vi2. If no such element exists, return
2456 NULL. This needs to be fairly quick since it is called every time
2457 a race is detected. */
2458static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2459{
2460 VTS *vts1, *vts2;
2461 Thr* diffthr;
2462 tl_assert(vi1 != vi2);
2463 vts1 = VtsID__to_VTS(vi1);
2464 vts2 = VtsID__to_VTS(vi2);
2465 tl_assert(vts1 != vts2);
2466 diffthr = VTS__cmpLEQ(vts1, vts2);
2467 tl_assert(diffthr); /* else they are LEQ ! */
2468 return diffthr;
2469}
2470
2471
2472/////////////////////////////////////////////////////////
2473// //
2474// Filters //
2475// //
2476/////////////////////////////////////////////////////////
2477
2478// baseline: 5, 9
2479#define FI_LINE_SZB_LOG2 5
2480#define FI_NUM_LINES_LOG2 10
2481
2482#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2483#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2484
2485#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2486#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2487
2488#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2489 & (Addr)(FI_NUM_LINES-1) )
2490
2491
2492/* In the lines, each 8 bytes are treated individually, and are mapped
2493 to a UShort. Regardless of endianness of the underlying machine,
2494 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2495 the highest address.
2496
2497 Of each bit pair, the higher numbered bit is set if a R has been
2498 seen, so the actual layout is:
2499
2500 15 14 ... 01 00
2501
2502 R W for addr+7 ... R W for addr+0
2503
2504 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2505*/
2506
2507/* tags are separated from lines. tags are Addrs and are
2508 the base address of the line. */
2509typedef
2510 struct {
2511 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2512 }
2513 FiLine;
2514
2515typedef
2516 struct {
2517 Addr tags[FI_NUM_LINES];
2518 FiLine lines[FI_NUM_LINES];
2519 }
2520 Filter;
2521
2522/* Forget everything we know -- clear the filter and let everything
2523 through. This needs to be as fast as possible, since it is called
2524 every time the running thread changes, and every time a thread's
2525 vector clocks change, which can be quite frequent. The obvious
2526 fast way to do this is simply to stuff in tags which we know are
2527 not going to match anything, since they're not aligned to the start
2528 of a line. */
2529static void Filter__clear ( Filter* fi, HChar* who )
2530{
2531 UWord i;
2532 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2533 for (i = 0; i < FI_NUM_LINES; i += 8) {
2534 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2535 fi->tags[i+1] = 1;
2536 fi->tags[i+2] = 1;
2537 fi->tags[i+3] = 1;
2538 fi->tags[i+4] = 1;
2539 fi->tags[i+5] = 1;
2540 fi->tags[i+6] = 1;
2541 fi->tags[i+7] = 1;
2542 }
2543 tl_assert(i == FI_NUM_LINES);
2544}
2545
2546/* Clearing an arbitrary range in the filter. Unfortunately
2547 we have to do this due to core-supplied new/die-mem events. */
2548
2549static void Filter__clear_1byte ( Filter* fi, Addr a )
2550{
2551 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2552 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2553 FiLine* line = &fi->lines[lineno];
2554 UWord loff = (a - atag) / 8;
2555 UShort mask = 0x3 << (2 * (a & 7));
2556 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2557 if (LIKELY( fi->tags[lineno] == atag )) {
2558 /* hit. clear the bits. */
2559 UShort u16 = line->u16s[loff];
2560 line->u16s[loff] = u16 & ~mask; /* clear them */
2561 } else {
2562 /* miss. The filter doesn't hold this address, so ignore. */
2563 }
2564}
2565
2566static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2567{
2568 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2569 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2570 FiLine* line = &fi->lines[lineno];
2571 UWord loff = (a - atag) / 8;
2572 if (LIKELY( fi->tags[lineno] == atag )) {
2573 line->u16s[loff] = 0;
2574 } else {
2575 /* miss. The filter doesn't hold this address, so ignore. */
2576 }
2577}
2578
2579static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2580{
2581 //VG_(printf)("%lu ", len);
2582 /* slowly do part preceding 8-alignment */
2583 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2584 Filter__clear_1byte( fi, a );
2585 a++;
2586 len--;
2587 }
2588 /* vector loop */
2589 while (len >= 8) {
2590 Filter__clear_8bytes_aligned( fi, a );
2591 a += 8;
2592 len -= 8;
2593 }
2594 /* slowly do tail */
2595 while (UNLIKELY(len > 0)) {
2596 Filter__clear_1byte( fi, a );
2597 a++;
2598 len--;
2599 }
2600}
2601
2602
2603/* ------ Read handlers for the filter. ------ */
2604
2605static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2606{
2607 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2608 return False;
2609 {
2610 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2611 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2612 FiLine* line = &fi->lines[lineno];
2613 UWord loff = (a - atag) / 8;
2614 UShort mask = 0xAAAA;
2615 if (LIKELY( fi->tags[lineno] == atag )) {
2616 /* hit. check line and update. */
2617 UShort u16 = line->u16s[loff];
2618 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2619 line->u16s[loff] = u16 | mask; /* set them */
2620 return ok;
2621 } else {
2622 /* miss. nuke existing line and re-use it. */
2623 UWord i;
2624 fi->tags[lineno] = atag;
2625 for (i = 0; i < FI_LINE_SZB / 8; i++)
2626 line->u16s[i] = 0;
2627 line->u16s[loff] = mask;
2628 return False;
2629 }
2630 }
2631}
2632
2633static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2634{
2635 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2636 return False;
2637 {
2638 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2639 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2640 FiLine* line = &fi->lines[lineno];
2641 UWord loff = (a - atag) / 8;
2642 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2643 if (LIKELY( fi->tags[lineno] == atag )) {
2644 /* hit. check line and update. */
2645 UShort u16 = line->u16s[loff];
2646 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2647 line->u16s[loff] = u16 | mask; /* set them */
2648 return ok;
2649 } else {
2650 /* miss. nuke existing line and re-use it. */
2651 UWord i;
2652 fi->tags[lineno] = atag;
2653 for (i = 0; i < FI_LINE_SZB / 8; i++)
2654 line->u16s[i] = 0;
2655 line->u16s[loff] = mask;
2656 return False;
2657 }
2658 }
2659}
2660
2661static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2662{
2663 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2664 return False;
2665 {
2666 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2667 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2668 FiLine* line = &fi->lines[lineno];
2669 UWord loff = (a - atag) / 8;
2670 UShort mask = 0xA << (2 * (a & 6));
2671 /* mask is A000, 0A00, 00A0 or 000A */
2672 if (LIKELY( fi->tags[lineno] == atag )) {
2673 /* hit. check line and update. */
2674 UShort u16 = line->u16s[loff];
2675 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2676 line->u16s[loff] = u16 | mask; /* set them */
2677 return ok;
2678 } else {
2679 /* miss. nuke existing line and re-use it. */
2680 UWord i;
2681 fi->tags[lineno] = atag;
2682 for (i = 0; i < FI_LINE_SZB / 8; i++)
2683 line->u16s[i] = 0;
2684 line->u16s[loff] = mask;
2685 return False;
2686 }
2687 }
2688}
2689
2690static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2691{
2692 {
2693 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2694 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2695 FiLine* line = &fi->lines[lineno];
2696 UWord loff = (a - atag) / 8;
2697 UShort mask = 0x2 << (2 * (a & 7));
2698 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2699 if (LIKELY( fi->tags[lineno] == atag )) {
2700 /* hit. check line and update. */
2701 UShort u16 = line->u16s[loff];
2702 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2703 line->u16s[loff] = u16 | mask; /* set them */
2704 return ok;
2705 } else {
2706 /* miss. nuke existing line and re-use it. */
2707 UWord i;
2708 fi->tags[lineno] = atag;
2709 for (i = 0; i < FI_LINE_SZB / 8; i++)
2710 line->u16s[i] = 0;
2711 line->u16s[loff] = mask;
2712 return False;
2713 }
2714 }
2715}
2716
2717
2718/* ------ Write handlers for the filter. ------ */
2719
2720static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2721{
2722 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2723 return False;
2724 {
2725 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2726 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2727 FiLine* line = &fi->lines[lineno];
2728 UWord loff = (a - atag) / 8;
2729 UShort mask = 0xFFFF;
2730 if (LIKELY( fi->tags[lineno] == atag )) {
2731 /* hit. check line and update. */
2732 UShort u16 = line->u16s[loff];
2733 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2734 line->u16s[loff] = u16 | mask; /* set them */
2735 return ok;
2736 } else {
2737 /* miss. nuke existing line and re-use it. */
2738 UWord i;
2739 fi->tags[lineno] = atag;
2740 for (i = 0; i < FI_LINE_SZB / 8; i++)
2741 line->u16s[i] = 0;
2742 line->u16s[loff] = mask;
2743 return False;
2744 }
2745 }
2746}
2747
2748static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2749{
2750 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2751 return False;
2752 {
2753 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2754 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2755 FiLine* line = &fi->lines[lineno];
2756 UWord loff = (a - atag) / 8;
2757 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2758 if (LIKELY( fi->tags[lineno] == atag )) {
2759 /* hit. check line and update. */
2760 UShort u16 = line->u16s[loff];
2761 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2762 line->u16s[loff] = u16 | mask; /* set them */
2763 return ok;
2764 } else {
2765 /* miss. nuke existing line and re-use it. */
2766 UWord i;
2767 fi->tags[lineno] = atag;
2768 for (i = 0; i < FI_LINE_SZB / 8; i++)
2769 line->u16s[i] = 0;
2770 line->u16s[loff] = mask;
2771 return False;
2772 }
2773 }
2774}
2775
2776static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2777{
2778 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2779 return False;
2780 {
2781 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2782 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2783 FiLine* line = &fi->lines[lineno];
2784 UWord loff = (a - atag) / 8;
2785 UShort mask = 0xF << (2 * (a & 6));
2786 /* mask is F000, 0F00, 00F0 or 000F */
2787 if (LIKELY( fi->tags[lineno] == atag )) {
2788 /* hit. check line and update. */
2789 UShort u16 = line->u16s[loff];
2790 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2791 line->u16s[loff] = u16 | mask; /* set them */
2792 return ok;
2793 } else {
2794 /* miss. nuke existing line and re-use it. */
2795 UWord i;
2796 fi->tags[lineno] = atag;
2797 for (i = 0; i < FI_LINE_SZB / 8; i++)
2798 line->u16s[i] = 0;
2799 line->u16s[loff] = mask;
2800 return False;
2801 }
2802 }
2803}
2804
2805static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2806{
2807 {
2808 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2809 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2810 FiLine* line = &fi->lines[lineno];
2811 UWord loff = (a - atag) / 8;
2812 UShort mask = 0x3 << (2 * (a & 7));
2813 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2814 if (LIKELY( fi->tags[lineno] == atag )) {
2815 /* hit. check line and update. */
2816 UShort u16 = line->u16s[loff];
2817 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2818 line->u16s[loff] = u16 | mask; /* set them */
2819 return ok;
2820 } else {
2821 /* miss. nuke existing line and re-use it. */
2822 UWord i;
2823 fi->tags[lineno] = atag;
2824 for (i = 0; i < FI_LINE_SZB / 8; i++)
2825 line->u16s[i] = 0;
2826 line->u16s[loff] = mask;
2827 return False;
2828 }
2829 }
2830}
2831
sewardjf98e1c02008-10-25 16:22:41 +00002832
2833/////////////////////////////////////////////////////////
2834// //
2835// Threads //
2836// //
2837/////////////////////////////////////////////////////////
2838
sewardj23f12002009-07-24 08:45:08 +00002839// QQQ move this somewhere else
2840typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
2841
sewardj8ab2c132009-08-02 09:34:35 +00002842/* How many of the above records to collect for each thread? Older
2843 ones are dumped when we run out of space. 62.5k requires 1MB per
2844 thread, since each ULong_n_EC record is 16 bytes long. When more
2845 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
2846 deleted to make space. Hence in the worst case we will be able to
2847 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
2848 Kw transitions (segments in this thread). For the current setting
2849 that gives a guaranteed stack for at least the last 31.25k
2850 segments. */
2851#define N_KWs_N_STACKs_PER_THREAD 62500
2852
2853
sewardjf98e1c02008-10-25 16:22:41 +00002854struct _Thr {
2855 /* Current VTSs for this thread. They change as we go along. viR
2856 is the VTS to be used for reads, viW for writes. Usually they
2857 are the same, but can differ when we deal with reader-writer
sewardj23f12002009-07-24 08:45:08 +00002858 locks. It is always the case that
2859 VtsID__cmpLEQ(viW,viR) == True
2860 that is, viW must be the same, or lagging behind, viR. */
sewardjf98e1c02008-10-25 16:22:41 +00002861 VtsID viR;
2862 VtsID viW;
sewardj23f12002009-07-24 08:45:08 +00002863
2864 /* Is initially False, and is set to true after the thread really
2865 has done a low-level exit. */
2866 Bool still_alive;
2867
2868 /* A filter that removes references for which we believe that
2869 msmcread/msmcwrite will not change the state, nor report a
2870 race. */
2871 Filter* filter;
2872
sewardjf98e1c02008-10-25 16:22:41 +00002873 /* opaque (to us) data we hold on behalf of the library's user. */
2874 void* opaque;
sewardj23f12002009-07-24 08:45:08 +00002875
sewardj8ab2c132009-08-02 09:34:35 +00002876 /* The ULongs (scalar Kws) in this accumulate in strictly
sewardj23f12002009-07-24 08:45:08 +00002877 increasing order, without duplicates. This is important because
sewardj8ab2c132009-08-02 09:34:35 +00002878 we need to be able to find a given scalar Kw in this array
sewardj23f12002009-07-24 08:45:08 +00002879 later, by binary search. */
sewardj8ab2c132009-08-02 09:34:35 +00002880 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
sewardjf98e1c02008-10-25 16:22:41 +00002881};
2882
2883static Thr* Thr__new ( void ) {
2884 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2885 thr->viR = VtsID_INVALID;
2886 thr->viW = VtsID_INVALID;
sewardj23f12002009-07-24 08:45:08 +00002887 thr->still_alive = True;
2888 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
sewardj2d2ea2f2009-08-02 10:15:07 +00002889 /* We only really need this at history level 1, but unfortunately
2890 this routine is called before the command line processing is
2891 done (sigh), so we can't rely on HG_(clo_history_level) at this
2892 point. Hence always allocate it. Bah. */
sewardj8ab2c132009-08-02 09:34:35 +00002893 thr->local_Kws_n_stacks
sewardj2d2ea2f2009-08-02 10:15:07 +00002894 = VG_(newXA)( HG_(zalloc),
2895 "libhb.Thr__new.3 (local_Kws_and_stacks)",
sewardj23f12002009-07-24 08:45:08 +00002896 HG_(free), sizeof(ULong_n_EC) );
sewardjf98e1c02008-10-25 16:22:41 +00002897 return thr;
2898}
2899
sewardj8ab2c132009-08-02 09:34:35 +00002900static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00002901{
2902 Word nPresent;
2903 ULong_n_EC pair;
2904 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00002905
2906 // We only collect this info at history level 1 (approx)
2907 if (HG_(clo_history_level) != 1)
2908 return;
2909
sewardj8ab2c132009-08-02 09:34:35 +00002910 /* This is the scalar Kw for thr. */
2911 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00002912 pair.ec = main_get_EC( thr );
2913 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00002914 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00002915
2916 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00002917 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00002918
2919 /* Throw away old stacks, if necessary. We can't accumulate stuff
2920 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00002921 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
2922 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
2923 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
2924 if (0)
2925 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00002926 thr, pair.ull, pair.ec );
2927 }
2928
2929 if (nPresent > 0) {
2930 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00002931 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
2932 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00002933 }
2934
2935 if (nPresent == 0)
2936 pair.ec = NULL;
2937
sewardj8ab2c132009-08-02 09:34:35 +00002938 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00002939
2940 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00002941 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00002942 thr, pair.ull, pair.ec );
2943 if (0)
2944 VG_(pp_ExeContext)(pair.ec);
2945}
2946
2947static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
2948{
2949 if (pair1->ull < pair2->ull) return -1;
2950 if (pair1->ull > pair2->ull) return 1;
2951 return 0;
2952}
2953
sewardjf98e1c02008-10-25 16:22:41 +00002954
2955/////////////////////////////////////////////////////////
2956// //
2957// Shadow Values //
2958// //
2959/////////////////////////////////////////////////////////
2960
2961// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2962// hb_zsm.h. We have to do everything else here.
2963
2964/* SVal is 64 bit unsigned int.
2965
2966 <---------30---------> <---------30--------->
2967 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00002968 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00002969 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
2970
sewardjf98e1c02008-10-25 16:22:41 +00002971*/
2972#define SVAL_TAGMASK (3ULL << 62)
2973
2974static inline Bool SVal__isC ( SVal s ) {
2975 return (0ULL << 62) == (s & SVAL_TAGMASK);
2976}
2977static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2978 //tl_assert(VtsID__is_valid(rmini));
2979 //tl_assert(VtsID__is_valid(wmini));
2980 return (((ULong)rmini) << 32) | ((ULong)wmini);
2981}
2982static inline VtsID SVal__unC_Rmin ( SVal s ) {
2983 tl_assert(SVal__isC(s));
2984 return (VtsID)(s >> 32);
2985}
2986static inline VtsID SVal__unC_Wmin ( SVal s ) {
2987 tl_assert(SVal__isC(s));
2988 return (VtsID)(s & 0xFFFFFFFFULL);
2989}
2990
sewardj23f12002009-07-24 08:45:08 +00002991static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00002992 return (2ULL << 62) == (s & SVAL_TAGMASK);
2993}
sewardj23f12002009-07-24 08:45:08 +00002994static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00002995 return 2ULL << 62;
2996}
2997
2998/* Direct callback from lib_zsm. */
2999static void SVal__rcinc ( SVal s ) {
3000 if (SVal__isC(s)) {
3001 VtsID__rcinc( SVal__unC_Rmin(s) );
3002 VtsID__rcinc( SVal__unC_Wmin(s) );
3003 }
3004}
3005
3006/* Direct callback from lib_zsm. */
3007static void SVal__rcdec ( SVal s ) {
3008 if (SVal__isC(s)) {
3009 VtsID__rcdec( SVal__unC_Rmin(s) );
3010 VtsID__rcdec( SVal__unC_Wmin(s) );
3011 }
3012}
3013
3014
3015/////////////////////////////////////////////////////////
3016// //
sewardjd86e3a22008-12-03 11:39:37 +00003017// A simple group (memory) allocator //
3018// //
3019/////////////////////////////////////////////////////////
3020
3021//////////////// BEGIN general group allocator
3022typedef
3023 struct {
3024 UWord elemSzB; /* element size */
3025 UWord nPerGroup; /* # elems per group */
3026 void* (*alloc)(HChar*, SizeT); /* group allocator */
3027 HChar* cc; /* group allocator's cc */
3028 void (*free)(void*); /* group allocator's free-er (unused) */
3029 /* XArray of void* (pointers to groups). The groups themselves.
3030 Each element is a pointer to a block of size (elemSzB *
3031 nPerGroup) bytes. */
3032 XArray* groups;
3033 /* next free element. Is a pointer to an element in one of the
3034 groups pointed to by .groups. */
3035 void* nextFree;
3036 }
3037 GroupAlloc;
3038
3039static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3040 UWord elemSzB,
3041 UWord nPerGroup,
3042 void* (*alloc)(HChar*, SizeT),
3043 HChar* cc,
3044 void (*free)(void*) )
3045{
3046 tl_assert(0 == (elemSzB % sizeof(UWord)));
3047 tl_assert(elemSzB >= sizeof(UWord));
3048 tl_assert(nPerGroup >= 100); /* let's say */
3049 tl_assert(alloc);
3050 tl_assert(cc);
3051 tl_assert(free);
3052 tl_assert(ga);
3053 VG_(memset)(ga, 0, sizeof(*ga));
3054 ga->elemSzB = elemSzB;
3055 ga->nPerGroup = nPerGroup;
3056 ga->groups = NULL;
3057 ga->alloc = alloc;
3058 ga->cc = cc;
3059 ga->free = free;
3060 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3061 ga->nextFree = NULL;
3062 tl_assert(ga->groups);
3063}
3064
3065/* The freelist is empty. Allocate a new group and put all the new
3066 elements in it onto the freelist. */
3067__attribute__((noinline))
3068static void gal_add_new_group ( GroupAlloc* ga )
3069{
3070 Word i;
3071 UWord* group;
3072 tl_assert(ga);
3073 tl_assert(ga->nextFree == NULL);
3074 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3075 tl_assert(group);
3076 /* extend the freelist through the new group. Place the freelist
3077 pointer in the first word of each element. That's why the
3078 element size must be at least one word. */
3079 for (i = ga->nPerGroup-1; i >= 0; i--) {
3080 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3081 UWord* elem = (UWord*)elemC;
3082 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3083 *elem = (UWord)ga->nextFree;
3084 ga->nextFree = elem;
3085 }
3086 /* and add to our collection of groups */
3087 VG_(addToXA)( ga->groups, &group );
3088}
3089
3090inline static void* gal_Alloc ( GroupAlloc* ga )
3091{
3092 UWord* elem;
3093 if (UNLIKELY(ga->nextFree == NULL)) {
3094 gal_add_new_group(ga);
3095 }
3096 elem = ga->nextFree;
3097 ga->nextFree = (void*)*elem;
3098 *elem = 0; /* unnecessary, but just to be on the safe side */
3099 return elem;
3100}
3101
3102inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3103{
3104 tl_assert(n == ga->elemSzB);
3105 return gal_Alloc( ga );
3106}
3107
3108inline static void gal_Free ( GroupAlloc* ga, void* p )
3109{
3110 UWord* elem = (UWord*)p;
3111 *elem = (UWord)ga->nextFree;
3112 ga->nextFree = elem;
3113}
3114//////////////// END general group allocator
3115
3116
3117/////////////////////////////////////////////////////////
3118// //
sewardjf98e1c02008-10-25 16:22:41 +00003119// Change-event map2 //
3120// //
3121/////////////////////////////////////////////////////////
3122
sewardjf98e1c02008-10-25 16:22:41 +00003123#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3124
3125/* This is in two parts:
3126
sewardj23f12002009-07-24 08:45:08 +00003127 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003128 traces. When the reference count of a stack trace becomes zero,
3129 it is removed from the set and freed up. The intent is to have
3130 a set of stack traces which can be referred to from (2), but to
3131 only represent each one once. The set is indexed/searched by
3132 ordering on the stack trace vectors.
3133
sewardj849b0ed2008-12-21 10:43:10 +00003134 2. A SparseWA of OldRefs. These store information about each old
3135 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003136 location for which the information is recorded. For LRU
3137 purposes, each OldRef also contains a generation number,
3138 indicating when it was most recently accessed.
3139
3140 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003141 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3142 size) triples to RCECs. This allows us to collect the last
3143 access-traceback by up to N_OLDREF_ACCS different triples for
3144 this location. The accs[] array is a MTF-array. If a binding
3145 falls off the end, that's too bad -- we will lose info about
3146 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003147
sewardj849b0ed2008-12-21 10:43:10 +00003148 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003149 whose generation numbers are below some threshold; hence doing
3150 approximate LRU discarding. For each discarded OldRef we must
3151 of course decrement the reference count on the all RCECs it
3152 refers to, in order that entries from (1) eventually get
3153 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003154
3155 A major improvement in reliability of this mechanism would be to
3156 have a dynamically sized OldRef.accs[] array, so no entries ever
3157 fall off the end. In investigations (Dec 08) it appears that a
3158 major cause for the non-availability of conflicting-access traces
3159 in race reports is caused by the fixed size of this array. I
3160 suspect for most OldRefs, only a few entries are used, but for a
3161 minority of cases there is an overflow, leading to info lossage.
3162 Investigations also suggest this is very workload and scheduling
3163 sensitive. Therefore a dynamic sizing would be better.
3164
3165 However, dynamic sizing would defeat the use of a GroupAllocator
3166 for OldRef structures. And that's important for performance. So
3167 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003168*/
3169
3170
3171static UWord stats__ctxt_rcdec1 = 0;
3172static UWord stats__ctxt_rcdec2 = 0;
3173static UWord stats__ctxt_rcdec3 = 0;
3174static UWord stats__ctxt_rcdec_calls = 0;
3175static UWord stats__ctxt_rcdec_discards = 0;
3176static UWord stats__ctxt_rcdec1_eq = 0;
3177
3178static UWord stats__ctxt_tab_curr = 0;
3179static UWord stats__ctxt_tab_max = 0;
3180
3181static UWord stats__ctxt_tab_qs = 0;
3182static UWord stats__ctxt_tab_cmps = 0;
3183
3184
3185///////////////////////////////////////////////////////
3186//// Part (1): An OSet of RCECs
3187///
3188
3189#define N_FRAMES 8
3190
3191// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3192#define RCEC_MAGIC 0xab88abb2UL
3193
3194//#define N_RCEC_TAB 98317 /* prime */
3195#define N_RCEC_TAB 196613 /* prime */
3196
3197typedef
3198 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003199 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003200 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003201 UWord rc;
3202 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003203 UWord frames_hash; /* hash of all the frames */
3204 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003205 }
3206 RCEC;
3207
3208static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3209
3210
3211/* Gives an arbitrary total order on RCEC .frames fields */
3212static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3213 Word i;
3214 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3215 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003216 if (ec1->frames_hash < ec2->frames_hash) return -1;
3217 if (ec1->frames_hash > ec2->frames_hash) return 1;
3218 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003219 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003220 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003221 }
3222 return 0;
3223}
3224
3225
3226/* Dec the ref of this RCEC. */
3227static void ctxt__rcdec ( RCEC* ec )
3228{
3229 stats__ctxt_rcdec_calls++;
3230 tl_assert(ec && ec->magic == RCEC_MAGIC);
3231 tl_assert(ec->rc > 0);
3232 ec->rc--;
3233}
3234
3235static void ctxt__rcinc ( RCEC* ec )
3236{
3237 tl_assert(ec && ec->magic == RCEC_MAGIC);
3238 ec->rc++;
3239}
3240
3241
sewardjd86e3a22008-12-03 11:39:37 +00003242//////////// BEGIN RCEC group allocator
3243static GroupAlloc rcec_group_allocator;
3244
3245static RCEC* alloc_RCEC ( void ) {
3246 return gal_Alloc ( &rcec_group_allocator );
3247}
3248
3249static void free_RCEC ( RCEC* rcec ) {
3250 tl_assert(rcec->magic == RCEC_MAGIC);
3251 gal_Free( &rcec_group_allocator, rcec );
3252}
3253//////////// END OldRef group allocator
3254
3255
sewardjf98e1c02008-10-25 16:22:41 +00003256/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3257 move it one step closer the the front of the list, so as to make
3258 subsequent searches for it cheaper. */
3259static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3260{
3261 RCEC *ec0, *ec1, *ec2;
3262 if (ec == *headp)
3263 tl_assert(0); /* already at head of list */
3264 tl_assert(ec != NULL);
3265 ec0 = *headp;
3266 ec1 = NULL;
3267 ec2 = NULL;
3268 while (True) {
3269 if (ec0 == NULL || ec0 == ec) break;
3270 ec2 = ec1;
3271 ec1 = ec0;
3272 ec0 = ec0->next;
3273 }
3274 tl_assert(ec0 == ec);
3275 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3276 RCEC* tmp;
3277 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3278 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3279 closer to the start of the list. */
3280 tl_assert(ec2->next == ec1);
3281 tl_assert(ec1->next == ec0);
3282 tmp = ec0->next;
3283 ec2->next = ec0;
3284 ec0->next = ec1;
3285 ec1->next = tmp;
3286 }
3287 else
3288 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3289 /* it's second in the list. */
3290 tl_assert(*headp == ec1);
3291 tl_assert(ec1->next == ec0);
3292 ec1->next = ec0->next;
3293 ec0->next = ec1;
3294 *headp = ec0;
3295 }
3296}
3297
3298
3299/* Find the given RCEC in the tree, and return a pointer to it. Or,
3300 if not present, add the given one to the tree (by making a copy of
3301 it, so the caller can immediately deallocate the original) and
3302 return a pointer to the copy. The caller can safely have 'example'
3303 on its stack, since we will always return a pointer to a copy of
3304 it, not to the original. Note that the inserted node will have .rc
3305 of zero and so the caller must immediatly increment it. */
3306__attribute__((noinline))
3307static RCEC* ctxt__find_or_add ( RCEC* example )
3308{
3309 UWord hent;
3310 RCEC* copy;
3311 tl_assert(example && example->magic == RCEC_MAGIC);
3312 tl_assert(example->rc == 0);
3313
3314 /* Search the hash table to see if we already have it. */
3315 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003316 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003317 copy = contextTab[hent];
3318 while (1) {
3319 if (!copy) break;
3320 tl_assert(copy->magic == RCEC_MAGIC);
3321 stats__ctxt_tab_cmps++;
3322 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3323 copy = copy->next;
3324 }
3325
3326 if (copy) {
3327 tl_assert(copy != example);
3328 /* optimisation: if it's not at the head of its list, move 1
3329 step fwds, to make future searches cheaper */
3330 if (copy != contextTab[hent]) {
3331 move_RCEC_one_step_forward( &contextTab[hent], copy );
3332 }
3333 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003334 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00003335 tl_assert(copy != example);
3336 *copy = *example;
3337 copy->next = contextTab[hent];
3338 contextTab[hent] = copy;
3339 stats__ctxt_tab_curr++;
3340 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3341 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3342 }
3343 return copy;
3344}
3345
3346static inline UWord ROLW ( UWord w, Int n )
3347{
3348 Int bpw = 8 * sizeof(UWord);
3349 w = (w << n) | (w >> (bpw-n));
3350 return w;
3351}
3352
3353__attribute__((noinline))
3354static RCEC* get_RCEC ( Thr* thr )
3355{
3356 UWord hash, i;
3357 RCEC example;
3358 example.magic = RCEC_MAGIC;
3359 example.rc = 0;
3360 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00003361 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00003362 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00003363 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003364 hash ^= example.frames[i];
3365 hash = ROLW(hash, 19);
3366 }
njn6c83d5e2009-05-05 23:46:24 +00003367 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00003368 return ctxt__find_or_add( &example );
3369}
3370
3371///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00003372//// Part (2):
3373/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00003374///
3375
3376// (UInt) `echo "Old Reference Information" | md5sum`
3377#define OldRef_MAGIC 0x30b1f075UL
3378
sewardjc5ea9962008-12-07 01:41:46 +00003379/* Records an access: a thread and a context. The size
3380 (1,2,4,8) and read-or-writeness are also encoded as
3381 follows: bottom bit of .thr is 1 if write, 0 if read
3382 bottom 2 bits of .rcec are encode size:
3383 00 = 1, 01 = 2, 10 = 4, 11 = 8
3384*/
sewardjf98e1c02008-10-25 16:22:41 +00003385typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3386
sewardj849b0ed2008-12-21 10:43:10 +00003387#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00003388
3389typedef
3390 struct {
sewardjd86e3a22008-12-03 11:39:37 +00003391 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003392 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00003393 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00003394 /* unused slots in this array have .thr == NULL */
3395 Thr_n_RCEC accs[N_OLDREF_ACCS];
3396 }
3397 OldRef;
3398
sewardjd86e3a22008-12-03 11:39:37 +00003399
3400//////////// BEGIN OldRef group allocator
3401static GroupAlloc oldref_group_allocator;
3402
3403static OldRef* alloc_OldRef ( void ) {
3404 return gal_Alloc ( &oldref_group_allocator );
3405}
3406
3407static void free_OldRef ( OldRef* r ) {
3408 tl_assert(r->magic == OldRef_MAGIC);
3409 gal_Free( &oldref_group_allocator, r );
3410}
3411//////////// END OldRef group allocator
3412
sewardjd86e3a22008-12-03 11:39:37 +00003413
sewardjbc307e52008-12-06 22:10:54 +00003414static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3415static UWord oldrefGen = 0; /* current LRU generation # */
3416static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3417static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00003418
sewardjc5ea9962008-12-07 01:41:46 +00003419inline static void* ptr_or_UWord ( void* p, UWord w ) {
3420 return (void*)( ((UWord)p) | ((UWord)w) );
3421}
3422inline static void* ptr_and_UWord ( void* p, UWord w ) {
3423 return (void*)( ((UWord)p) & ((UWord)w) );
3424}
3425
sewardj1669cc72008-12-13 01:20:21 +00003426inline static UInt min_UInt ( UInt a, UInt b ) {
3427 return a < b ? a : b;
3428}
3429
sewardja781be62008-12-08 00:12:28 +00003430/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3431 first interval is lower, 1 if the first interval is higher, and 0
3432 if there is any overlap. Redundant paranoia with casting is there
3433 following what looked distinctly like a bug in gcc-4.1.2, in which
3434 some of the comparisons were done signedly instead of
3435 unsignedly. */
3436/* Copied from exp-ptrcheck/sg_main.c */
3437static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3438 Addr a2, SizeT n2 ) {
3439 UWord a1w = (UWord)a1;
3440 UWord n1w = (UWord)n1;
3441 UWord a2w = (UWord)a2;
3442 UWord n2w = (UWord)n2;
3443 tl_assert(n1w > 0 && n2w > 0);
3444 if (a1w + n1w <= a2w) return -1L;
3445 if (a2w + n2w <= a1w) return 1L;
3446 return 0;
3447}
3448
sewardjc5ea9962008-12-07 01:41:46 +00003449static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00003450{
sewardjd86e3a22008-12-03 11:39:37 +00003451 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00003452 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00003453 Word i, j;
3454 UWord keyW, valW;
3455 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003456
sewardjc5ea9962008-12-07 01:41:46 +00003457 rcec = get_RCEC( thr );
3458 ctxt__rcinc(rcec);
3459
3460 /* encode the size and writeness of the transaction in the bottom
3461 two bits of thr and rcec. */
3462 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3463 switch (szB) {
3464 /* This doesn't look particularly branch-predictor friendly. */
3465 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3466 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3467 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3468 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3469 default: tl_assert(0);
3470 }
3471
3472 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003473 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003474
sewardjd86e3a22008-12-03 11:39:37 +00003475 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003476
3477 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003478 see if we have a stack trace pertaining to this (thread, R/W,
3479 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003480 tl_assert(keyW == a);
3481 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003482 tl_assert(ref->magic == OldRef_MAGIC);
3483
3484 tl_assert(thr);
3485 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003486 if (ref->accs[i].thr != thr)
3487 continue;
3488 /* since .thr encodes both the accessing thread and the
3489 read/writeness, we know now that at least those features
3490 of the access match this entry. So we just need to check
3491 the size indication. Do this by inspecting the lowest 2 bits of
3492 .rcec, which contain the encoded size info. */
3493 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3494 continue;
3495 /* else we have a match, so stop looking. */
3496 break;
sewardjf98e1c02008-10-25 16:22:41 +00003497 }
3498
3499 if (i < N_OLDREF_ACCS) {
3500 /* thread 'thr' has an entry at index 'i'. Update it. */
3501 if (i > 0) {
3502 Thr_n_RCEC tmp = ref->accs[i-1];
3503 ref->accs[i-1] = ref->accs[i];
3504 ref->accs[i] = tmp;
3505 i--;
3506 }
sewardjc5ea9962008-12-07 01:41:46 +00003507 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003508 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003509 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3510 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003511 tl_assert(ref->accs[i].thr == thr);
3512 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003513 /* No entry for this (thread, R/W, size) triple. Shuffle all
3514 of them down one slot, and put the new entry at the start
3515 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003516 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3517 /* the last slot is in use. We must dec the rc on the
3518 associated rcec. */
3519 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3520 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003521 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3522 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003523 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003524 } else {
3525 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3526 }
3527 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3528 ref->accs[j] = ref->accs[j-1];
3529 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003530 ref->accs[0].rcec = rcec;
3531 /* thr==NULL is used to signify an empty slot, so we can't
3532 add a NULL thr. */
3533 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003534 }
3535
3536 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003537
3538 } else {
3539
3540 /* We don't have a record for this address. Create a new one. */
3541 if (oldrefTreeN >= oldrefGenIncAt) {
3542 oldrefGen++;
3543 oldrefGenIncAt = oldrefTreeN + 50000;
3544 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3545 oldrefGen, oldrefTreeN );
3546 }
sewardjd86e3a22008-12-03 11:39:37 +00003547
3548 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003549 ref->magic = OldRef_MAGIC;
3550 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003551 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003552 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003553 /* thr==NULL is used to signify an empty slot, so we can't add a
3554 NULL thr. */
3555 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003556 for (j = 1; j < N_OLDREF_ACCS; j++) {
3557 ref->accs[j].thr = NULL;
3558 ref->accs[j].rcec = NULL;
3559 }
sewardjbc307e52008-12-06 22:10:54 +00003560 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003561 oldrefTreeN++;
3562
3563 }
3564}
3565
3566
sewardjc5ea9962008-12-07 01:41:46 +00003567Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3568 /*OUT*/Thr** resThr,
3569 /*OUT*/SizeT* resSzB,
3570 /*OUT*/Bool* resIsW,
3571 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003572{
sewardja781be62008-12-08 00:12:28 +00003573 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003574 OldRef* ref;
3575 UWord keyW, valW;
3576 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003577
sewardjc5ea9962008-12-07 01:41:46 +00003578 Thr* cand_thr;
3579 RCEC* cand_rcec;
3580 Bool cand_isW;
3581 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003582 Addr cand_a;
3583
3584 Addr toCheck[15];
3585 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003586
3587 tl_assert(thr);
3588 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003589
sewardja781be62008-12-08 00:12:28 +00003590 toCheck[nToCheck++] = a;
3591 for (i = -7; i < (Word)szB; i++) {
3592 if (i != 0)
3593 toCheck[nToCheck++] = a + i;
3594 }
3595 tl_assert(nToCheck <= 15);
3596
3597 /* Now see if we can find a suitable matching event for
3598 any of the addresses in toCheck[0 .. nToCheck-1]. */
3599 for (j = 0; j < nToCheck; j++) {
3600
3601 cand_a = toCheck[j];
3602 // VG_(printf)("test %ld %p\n", j, cand_a);
3603
3604 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3605 if (!b)
3606 continue;
3607
sewardjd86e3a22008-12-03 11:39:37 +00003608 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003609 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003610 tl_assert(ref->magic == OldRef_MAGIC);
3611 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3612
sewardjc5ea9962008-12-07 01:41:46 +00003613 cand_thr = NULL;
3614 cand_rcec = NULL;
3615 cand_isW = False;
3616 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003617
sewardjc5ea9962008-12-07 01:41:46 +00003618 for (i = 0; i < N_OLDREF_ACCS; i++) {
3619 Thr_n_RCEC* cand = &ref->accs[i];
3620 cand_thr = ptr_and_UWord(cand->thr, ~3);
3621 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3622 /* Decode the writeness from the bottom bit of .thr. */
3623 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3624 /* Decode the size from the bottom two bits of .rcec. */
3625 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3626 case 0: cand_szB = 1; break;
3627 case 1: cand_szB = 2; break;
3628 case 2: cand_szB = 4; break;
3629 case 3: cand_szB = 8; break;
3630 default: tl_assert(0);
3631 }
3632
3633 if (cand_thr == NULL)
3634 /* This slot isn't in use. Ignore it. */
3635 continue;
3636
3637 if (cand_thr == thr)
3638 /* This is an access by the same thread, but we're only
3639 interested in accesses from other threads. Ignore. */
3640 continue;
3641
3642 if ((!cand_isW) && (!isW))
3643 /* We don't want to report a read racing against another
3644 read; that's stupid. So in this case move on. */
3645 continue;
3646
sewardja781be62008-12-08 00:12:28 +00003647 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3648 /* No overlap with the access we're asking about. Ignore. */
3649 continue;
3650
sewardjc5ea9962008-12-07 01:41:46 +00003651 /* We have a match. Stop searching. */
3652 break;
3653 }
3654
3655 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3656
sewardja781be62008-12-08 00:12:28 +00003657 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003658 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003659 /* return with success */
3660 tl_assert(cand_thr);
3661 tl_assert(cand_rcec);
3662 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3663 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003664 /* Count how many non-zero frames we have. */
3665 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3666 for (n = 0; n < maxNFrames; n++) {
3667 if (0 == cand_rcec->frames[n]) break;
3668 }
3669 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003670 *resThr = cand_thr;
3671 *resSzB = cand_szB;
3672 *resIsW = cand_isW;
3673 return True;
3674 }
sewardjc5ea9962008-12-07 01:41:46 +00003675
sewardja781be62008-12-08 00:12:28 +00003676 /* consider next address in toCheck[] */
3677 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003678
sewardja781be62008-12-08 00:12:28 +00003679 /* really didn't find anything. */
3680 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003681}
3682
3683static void event_map_init ( void )
3684{
3685 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003686
3687 /* Context (RCEC) group allocator */
3688 init_GroupAlloc ( &rcec_group_allocator,
3689 sizeof(RCEC),
3690 1000 /* RCECs per group */,
3691 HG_(zalloc),
3692 "libhb.event_map_init.1 (RCEC groups)",
3693 HG_(free) );
3694
3695 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003696 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003697 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003698 N_RCEC_TAB * sizeof(RCEC*) );
3699 tl_assert(contextTab);
3700 for (i = 0; i < N_RCEC_TAB; i++)
3701 contextTab[i] = NULL;
3702
sewardjd86e3a22008-12-03 11:39:37 +00003703 /* Oldref group allocator */
3704 init_GroupAlloc ( &oldref_group_allocator,
3705 sizeof(OldRef),
3706 1000 /* OldRefs per group */,
3707 HG_(zalloc),
3708 "libhb.event_map_init.3 (OldRef groups)",
3709 HG_(free) );
3710
sewardjd86e3a22008-12-03 11:39:37 +00003711 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003712 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003713 oldrefTree = VG_(newSWA)(
3714 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003715 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003716 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003717 );
3718 tl_assert(oldrefTree);
3719
3720 oldrefGen = 0;
3721 oldrefGenIncAt = 0;
3722 oldrefTreeN = 0;
3723}
3724
3725static void event_map__check_reference_counts ( Bool before )
3726{
3727 RCEC* rcec;
3728 OldRef* oldref;
3729 Word i;
3730 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003731 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003732
3733 /* Set the 'check' reference counts to zero. Also, optionally
3734 check that the real reference counts are non-zero. We allow
3735 these to fall to zero before a GC, but the GC must get rid of
3736 all those that are zero, hence none should be zero after a
3737 GC. */
3738 for (i = 0; i < N_RCEC_TAB; i++) {
3739 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3740 nEnts++;
3741 tl_assert(rcec);
3742 tl_assert(rcec->magic == RCEC_MAGIC);
3743 if (!before)
3744 tl_assert(rcec->rc > 0);
3745 rcec->rcX = 0;
3746 }
3747 }
3748
3749 /* check that the stats are sane */
3750 tl_assert(nEnts == stats__ctxt_tab_curr);
3751 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3752
3753 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003754 VG_(initIterSWA)( oldrefTree );
3755 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003756 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003757 tl_assert(oldref->magic == OldRef_MAGIC);
3758 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003759 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3760 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3761 if (aThr) {
3762 tl_assert(aRef);
3763 tl_assert(aRef->magic == RCEC_MAGIC);
3764 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003765 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003766 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003767 }
3768 }
3769 }
3770
3771 /* compare check ref counts with actual */
3772 for (i = 0; i < N_RCEC_TAB; i++) {
3773 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3774 tl_assert(rcec->rc == rcec->rcX);
3775 }
3776 }
3777}
3778
sewardj8fd92d32008-11-20 23:17:01 +00003779__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003780static void event_map_maybe_GC ( void )
3781{
3782 OldRef* oldref;
3783 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003784 XArray* refs2del;
3785 Word i, j, n2del;
3786
sewardj8fd92d32008-11-20 23:17:01 +00003787 UWord* genMap = NULL;
3788 UWord genMap_min = 0;
3789 UWord genMap_size = 0;
3790
sewardj849b0ed2008-12-21 10:43:10 +00003791 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003792 return;
3793
3794 if (0)
3795 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3796
sewardj849b0ed2008-12-21 10:43:10 +00003797 /* Check for sane command line params. Limit values must match
3798 those in hg_process_cmd_line_option. */
3799 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00003800 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00003801
sewardj8f5374e2008-12-07 11:40:17 +00003802 /* Check our counting is sane (expensive) */
3803 if (CHECK_CEM)
3804 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003805
sewardj8f5374e2008-12-07 11:40:17 +00003806 /* Check the reference counts (expensive) */
3807 if (CHECK_CEM)
3808 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003809
sewardj8fd92d32008-11-20 23:17:01 +00003810 /* Compute the distribution of generation values in the ref tree.
3811 There are likely only to be a few different generation numbers
3812 in the whole tree, but we don't know what they are. Hence use a
3813 dynamically resized array of counters. The array is genMap[0
3814 .. genMap_size-1], where genMap[0] is the count for the
3815 generation number genMap_min, genMap[1] is the count for
3816 genMap_min+1, etc. If a new number is seen outside the range
3817 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3818 copied into a larger array, and genMap_min and genMap_size are
3819 adjusted accordingly. */
3820
sewardjf98e1c02008-10-25 16:22:41 +00003821 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003822
sewardjbc307e52008-12-06 22:10:54 +00003823 VG_(initIterSWA)( oldrefTree );
3824 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003825
sewardjd86e3a22008-12-03 11:39:37 +00003826 UWord ea, key;
3827 oldref = (OldRef*)valW;
3828 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003829
3830 /* BEGIN find 'ea', which is the index in genMap holding the
3831 count for generation number 'key'. */
3832 if (UNLIKELY(genMap == NULL)) {
3833 /* deal with the first key to be seen, so that the following
3834 cases don't need to handle the complexity of a NULL count
3835 array. */
3836 genMap_min = key;
3837 genMap_size = 1;
3838 genMap = HG_(zalloc)( "libhb.emmG.1a",
3839 genMap_size * sizeof(UWord) );
3840 ea = 0;
3841 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3842 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003843 }
sewardj8fd92d32008-11-20 23:17:01 +00003844 else
3845 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3846 /* this is the expected (almost-always-happens) case: 'key'
3847 is already mapped in the array. */
3848 ea = key - genMap_min;
3849 }
3850 else
3851 if (key < genMap_min) {
3852 /* 'key' appears before the start of the current array.
3853 Extend the current array by allocating a larger one and
3854 copying the current one to the upper end of it. */
3855 Word more;
3856 UWord* map2;
3857 more = genMap_min - key;
3858 tl_assert(more > 0);
3859 map2 = HG_(zalloc)( "libhb.emmG.1b",
3860 (genMap_size + more) * sizeof(UWord) );
3861 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3862 HG_(free)( genMap );
3863 genMap = map2;
3864 genMap_size += more;
3865 genMap_min -= more;
3866 ea = 0;
3867 tl_assert(genMap_min == key);
3868 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3869 key, genMap_min, genMap_min+genMap_size- 1 );
3870 }
3871 else {
3872 /* 'key' appears after the end of the current array. Extend
3873 the current array by allocating a larger one and copying
3874 the current one to the lower end of it. */
3875 Word more;
3876 UWord* map2;
3877 tl_assert(key >= genMap_min + genMap_size);
3878 more = key - (genMap_min + genMap_size) + 1;
3879 tl_assert(more > 0);
3880 map2 = HG_(zalloc)( "libhb.emmG.1c",
3881 (genMap_size + more) * sizeof(UWord) );
3882 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3883 HG_(free)( genMap );
3884 genMap = map2;
3885 genMap_size += more;
3886 ea = genMap_size - 1;;
3887 tl_assert(genMap_min + genMap_size - 1 == key);
3888 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3889 key, genMap_min, genMap_min+genMap_size- 1 );
3890 }
3891 /* END find 'ea' from 'key' */
3892
3893 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003894 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003895 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003896 }
3897
sewardj8fd92d32008-11-20 23:17:01 +00003898 tl_assert(genMap);
3899 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003900
sewardj8fd92d32008-11-20 23:17:01 +00003901 /* Sanity check what we just computed */
3902 { UWord sum = 0;
3903 for (i = 0; i < genMap_size; i++) {
3904 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3905 i + genMap_min, genMap[i] );
3906 sum += genMap[i];
3907 }
3908 tl_assert(sum == oldrefTreeN);
3909 }
3910
3911 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003912 retained = oldrefTreeN;
3913 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003914
3915 for (i = 0; i < genMap_size; i++) {
3916 keyW = i + genMap_min;
3917 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003918 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3919 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3920 tl_assert(keyW >= maxGen);
3921 tl_assert(retained >= valW);
3922 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003923 > (UWord)(HG_(clo_conflict_cache_size)
3924 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003925 retained -= valW;
3926 maxGen = keyW;
3927 } else {
3928 break;
3929 }
3930 }
sewardjf98e1c02008-10-25 16:22:41 +00003931
sewardj8fd92d32008-11-20 23:17:01 +00003932 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003933
sewardj9b1f0fd2008-11-18 23:40:00 +00003934 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003935
3936 /* Now make up a big list of the oldrefTree entries we want to
3937 delete. We can't simultaneously traverse the tree and delete
3938 stuff from it, so first we need to copy them off somewhere
3939 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003940 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003941 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003942
sewardj9b1f0fd2008-11-18 23:40:00 +00003943 if (retained < oldrefTreeN) {
3944
3945 /* This is the normal (expected) case. We discard any ref whose
3946 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003947 VG_(initIterSWA)( oldrefTree );
3948 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003949 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003950 tl_assert(oldref->magic == OldRef_MAGIC);
3951 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003952 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003953 }
sewardjf98e1c02008-10-25 16:22:41 +00003954 }
sewardj5e2ac3b2009-08-11 10:39:25 +00003955 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003956 VG_(message)(Vg_DebugMsg,
3957 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00003958 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003959 maxGen, retained );
3960 }
3961
3962 } else {
3963
3964 static UInt rand_seed = 0; /* leave as static */
3965
3966 /* Degenerate case: there's only one generation in the entire
3967 tree, so we need to have some other way of deciding which
3968 refs to throw away. Just throw out half of them randomly. */
3969 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003970 VG_(initIterSWA)( oldrefTree );
3971 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003972 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003973 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003974 tl_assert(oldref->magic == OldRef_MAGIC);
3975 n = VG_(random)( &rand_seed );
3976 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003977 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003978 retained--;
3979 }
3980 }
sewardj5e2ac3b2009-08-11 10:39:25 +00003981 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003982 VG_(message)(Vg_DebugMsg,
3983 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00003984 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003985 retained );
3986 }
3987
sewardjf98e1c02008-10-25 16:22:41 +00003988 }
3989
3990 n2del = VG_(sizeXA)( refs2del );
3991 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3992
3993 if (0) VG_(printf)("%s","deleting entries\n");
3994 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003995 Bool b;
3996 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003997 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003998 tl_assert(b);
3999 tl_assert(keyW == ga2del);
4000 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004001 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00004002 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
4003 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
4004 if (aRef) {
4005 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004006 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004007 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004008 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004009 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004010 }
4011 }
sewardjd86e3a22008-12-03 11:39:37 +00004012
4013 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004014 }
4015
4016 VG_(deleteXA)( refs2del );
4017
sewardjc5ea9962008-12-07 01:41:46 +00004018 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004019
4020 oldrefTreeN = retained;
4021 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4022
4023 /* Throw away all RCECs with zero reference counts */
4024 for (i = 0; i < N_RCEC_TAB; i++) {
4025 RCEC** pp = &contextTab[i];
4026 RCEC* p = *pp;
4027 while (p) {
4028 if (p->rc == 0) {
4029 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004030 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004031 p = *pp;
4032 tl_assert(stats__ctxt_tab_curr > 0);
4033 stats__ctxt_tab_curr--;
4034 } else {
4035 pp = &p->next;
4036 p = p->next;
4037 }
4038 }
4039 }
4040
sewardj8f5374e2008-12-07 11:40:17 +00004041 /* Check the reference counts (expensive) */
4042 if (CHECK_CEM)
4043 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004044
4045 //if (0)
4046 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4047 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4048
4049}
4050
4051
4052/////////////////////////////////////////////////////////
4053// //
4054// Core MSM //
4055// //
4056/////////////////////////////////////////////////////////
4057
sewardj23f12002009-07-24 08:45:08 +00004058/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4059 Nov 08, and again after [...],
4060 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004061
sewardj23f12002009-07-24 08:45:08 +00004062static ULong stats__msmcread = 0;
4063static ULong stats__msmcread_change = 0;
4064static ULong stats__msmcwrite = 0;
4065static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004066
sewardj8ab2c132009-08-02 09:34:35 +00004067/* Some notes on the H1 history mechanism:
4068
4069 Transition rules are:
4070
4071 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4072 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4073
4074 After any access by a thread T to a location L, L's constraint pair
4075 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4076
4077 After a race by thread T conflicting with some previous access by
4078 some other thread U, for a location with constraint (before
4079 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4080 which the previously access lies.
4081
4082 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4083 are compared so as to find out which thread(s) this access
4084 conflicts with. Once that is established, we also require the
4085 pre-update Cw for the location, so we can index into it for those
4086 threads, to get the scalar clock values for the point at which the
4087 former accesses were made. (In fact we only bother to do any of
4088 this for an arbitrarily chosen one of the conflicting threads, as
4089 that's simpler, it avoids flooding the user with vast amounts of
4090 mostly useless information, and because the program is wrong if it
4091 contains any races at all -- so we don't really need to show all
4092 conflicting access pairs initially, so long as we only show none if
4093 none exist).
4094
4095 ---
4096
4097 That requires the auxiliary proof that
4098
4099 (Cr `join` Kw)[T] == Kw[T]
4100
4101 Why should that be true? Because for any thread T, Kw[T] >= the
4102 scalar clock value for T known by any other thread. In other
4103 words, because T's value for its own scalar clock is at least as up
4104 to date as the value for it known by any other thread (that is true
4105 for both the R- and W- scalar clocks). Hence no other thread will
4106 be able to feed in a value for that element (indirectly via a
4107 constraint) which will exceed Kw[T], and hence the join cannot
4108 cause that particular element to advance.
4109*/
4110
sewardjf98e1c02008-10-25 16:22:41 +00004111__attribute__((noinline))
4112static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004113 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004114 VtsID Cfailed,
4115 VtsID Kfailed,
4116 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004117{
sewardjc5ea9962008-12-07 01:41:46 +00004118 /* Call here to report a race. We just hand it onwards to
4119 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004120 error is going to be collected, then, at history_level 2, that
4121 queries the conflicting-event map. The alternative would be to
4122 query it right here. But that causes a lot of pointless queries
4123 for errors which will shortly be discarded as duplicates, and
4124 can become a performance overhead; so we defer the query until
4125 we know the error is not a duplicate. */
4126
4127 /* Stacks for the bounds of the (or one of the) conflicting
4128 segment(s). These are only set at history_level 1. */
4129 ExeContext* hist1_seg_start = NULL;
4130 ExeContext* hist1_seg_end = NULL;
4131 Thread* hist1_conf_thr = NULL;
4132
4133 tl_assert(acc_thr);
sewardjc5ea9962008-12-07 01:41:46 +00004134 tl_assert(acc_thr->opaque);
sewardj23f12002009-07-24 08:45:08 +00004135 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4136
4137 if (HG_(clo_history_level) == 1) {
4138 Bool found;
4139 Word firstIx, lastIx;
4140 ULong_n_EC key;
4141
4142 /* At history_level 1, we must round up the relevant stack-pair
4143 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004144 deferring it is complex; we can't (easily) put Kfailed and
4145 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004146 getting tied up in difficulties with VtsID reference
4147 counting. So just do it now. */
4148 Thr* confThr;
4149 ULong confTym = 0;
4150 /* Which thread are we in conflict with? There may be more than
4151 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4152 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004153 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004154 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004155 conflict (semantics of return value of
4156 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4157 called us, just checked exactly this -- that there was in
4158 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004159 tl_assert(confThr);
4160
4161 /* Get the scalar clock value that the conflicting thread
4162 introduced into the constraint. A careful examination of the
4163 base machine rules shows that this must be the same as the
4164 conflicting thread's scalar clock when it created this
4165 constraint. Hence we know the scalar clock of the
4166 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004167 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004168
4169 /* Using this scalar clock, index into the conflicting thread's
4170 collection of stack traces made each time its vector clock
4171 (hence its scalar clock) changed. This gives the stack
4172 traces at the start and end of the conflicting segment (well,
4173 as per comment just above, of one of the conflicting
4174 segments, if there are more than one). */
4175 key.ull = confTym;
4176 key.ec = NULL;
4177 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004178 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004179 firstIx = lastIx = 0;
4180 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004181 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004182 &key, &firstIx, &lastIx,
4183 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4184 );
sewardj8ab2c132009-08-02 09:34:35 +00004185 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004186 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004187 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004188 confThr, confTym, found, firstIx, lastIx);
4189 /* We can't indefinitely collect stack traces at VTS
4190 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004191 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004192 ones, which in turn means we might fail to find index value
4193 confTym in the array. */
4194 if (found) {
4195 ULong_n_EC *pair_start, *pair_end;
4196 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004197 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004198 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004199 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004200 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004201 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004202 lastIx+1 );
4203 /* from properties of VG_(lookupXA) and the comparison fn used: */
4204 tl_assert(pair_start->ull < pair_end->ull);
4205 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004206 /* Could do a bit better here. It may be that pair_end
4207 doesn't have a stack, but the following entries in the
4208 array have the same scalar Kw and to have a stack. So
4209 we should search a bit further along the array than
4210 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004211 } else {
4212 if (confThr->still_alive)
4213 hist1_seg_end = main_get_EC( confThr );
4214 }
4215 // seg_start could be NULL iff this is the first stack in the thread
4216 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4217 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4218 hist1_conf_thr = confThr->opaque;
4219 }
4220 }
4221
sewardjc5ea9962008-12-07 01:41:46 +00004222 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004223 szB, isWrite,
4224 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004225}
4226
4227static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004228 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004229 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004230 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4231 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004232}
4233
4234
4235/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004236static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004237 /* The following are only needed for
4238 creating error reports. */
4239 Thr* acc_thr,
4240 Addr acc_addr, SizeT szB )
4241{
4242 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004243 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004244
4245 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004246 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004247 tl_assert(is_sane_SVal_C(svOld));
4248 }
4249
sewardj1c0ce7a2009-07-01 08:10:49 +00004250 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004251 VtsID tviR = acc_thr->viR;
4252 VtsID tviW = acc_thr->viW;
4253 VtsID rmini = SVal__unC_Rmin(svOld);
4254 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004255 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4256 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004257 /* no race */
4258 /* Note: RWLOCK subtlety: use tviW, not tviR */
4259 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4260 goto out;
4261 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004262 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004263 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4264 tl_assert(leqxx);
4265 // same as in non-race case
4266 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4267 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004268 rmini, /* Cfailed */
4269 tviR, /* Kfailed */
4270 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004271 goto out;
4272 }
4273 }
4274 if (SVal__isA(svOld)) {
4275 /* reading no-access memory (sigh); leave unchanged */
4276 /* check for no pollution */
4277 tl_assert(svOld == SVal_NOACCESS);
4278 svNew = SVal_NOACCESS;
4279 goto out;
4280 }
sewardj23f12002009-07-24 08:45:08 +00004281 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004282 tl_assert(0);
4283
4284 out:
sewardj8f5374e2008-12-07 11:40:17 +00004285 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004286 tl_assert(is_sane_SVal_C(svNew));
4287 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004288 if (UNLIKELY(svNew != svOld)) {
4289 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004290 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004291 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004292 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004293 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004294 }
4295 }
4296 return svNew;
4297}
4298
4299
4300/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004301static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004302 /* The following are only needed for
4303 creating error reports. */
4304 Thr* acc_thr,
4305 Addr acc_addr, SizeT szB )
4306{
4307 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004308 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004309
4310 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004311 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004312 tl_assert(is_sane_SVal_C(svOld));
4313 }
4314
sewardj1c0ce7a2009-07-01 08:10:49 +00004315 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004316 VtsID tviW = acc_thr->viW;
4317 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004318 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4319 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004320 /* no race */
4321 svNew = SVal__mkC( tviW, tviW );
4322 goto out;
4323 } else {
4324 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004325 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004326 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4327 tl_assert(leqxx);
4328 // same as in non-race case
4329 // proof: in the non-race case, we have
4330 // rmini <= wmini (invar on constraints)
4331 // tviW <= tviR (invar on thread clocks)
4332 // wmini <= tviW (from run-time check)
4333 // hence from transitivity of <= we have
4334 // rmini <= wmini <= tviW
4335 // and so join(rmini,tviW) == tviW
4336 // and join(wmini,tviW) == tviW
4337 // qed.
4338 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4339 VtsID__join2(wmini, tviW) );
4340 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004341 wmini, /* Cfailed */
4342 tviW, /* Kfailed */
4343 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004344 goto out;
4345 }
4346 }
4347 if (SVal__isA(svOld)) {
4348 /* writing no-access memory (sigh); leave unchanged */
4349 /* check for no pollution */
4350 tl_assert(svOld == SVal_NOACCESS);
4351 svNew = SVal_NOACCESS;
4352 goto out;
4353 }
sewardj23f12002009-07-24 08:45:08 +00004354 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004355 tl_assert(0);
4356
4357 out:
sewardj8f5374e2008-12-07 11:40:17 +00004358 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004359 tl_assert(is_sane_SVal_C(svNew));
4360 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004361 if (UNLIKELY(svNew != svOld)) {
4362 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004363 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004364 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004365 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004366 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004367 }
4368 }
4369 return svNew;
4370}
4371
4372
4373/////////////////////////////////////////////////////////
4374// //
4375// Apply core MSM to specific memory locations //
4376// //
4377/////////////////////////////////////////////////////////
4378
sewardj23f12002009-07-24 08:45:08 +00004379/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004380
sewardj23f12002009-07-24 08:45:08 +00004381static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004382 CacheLine* cl;
4383 UWord cloff, tno, toff;
4384 SVal svOld, svNew;
4385 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004386 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004387 cl = get_cacheline(a);
4388 cloff = get_cacheline_offset(a);
4389 tno = get_treeno(a);
4390 toff = get_tree_offset(a); /* == 0 .. 7 */
4391 descr = cl->descrs[tno];
4392 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4393 SVal* tree = &cl->svals[tno << 3];
4394 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004395 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004396 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4397 }
4398 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004399 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004400 if (CHECK_ZSM)
4401 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004402 cl->svals[cloff] = svNew;
4403}
4404
sewardj23f12002009-07-24 08:45:08 +00004405static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004406 CacheLine* cl;
4407 UWord cloff, tno, toff;
4408 SVal svOld, svNew;
4409 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004410 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004411 cl = get_cacheline(a);
4412 cloff = get_cacheline_offset(a);
4413 tno = get_treeno(a);
4414 toff = get_tree_offset(a); /* == 0 .. 7 */
4415 descr = cl->descrs[tno];
4416 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4417 SVal* tree = &cl->svals[tno << 3];
4418 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004419 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004420 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4421 }
4422 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004423 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004424 if (CHECK_ZSM)
4425 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004426 cl->svals[cloff] = svNew;
4427}
4428
sewardj23f12002009-07-24 08:45:08 +00004429/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004430
sewardj23f12002009-07-24 08:45:08 +00004431static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004432 CacheLine* cl;
4433 UWord cloff, tno, toff;
4434 SVal svOld, svNew;
4435 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004436 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004437 if (UNLIKELY(!aligned16(a))) goto slowcase;
4438 cl = get_cacheline(a);
4439 cloff = get_cacheline_offset(a);
4440 tno = get_treeno(a);
4441 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4442 descr = cl->descrs[tno];
4443 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4444 if (valid_value_is_below_me_16(descr, toff)) {
4445 goto slowcase;
4446 } else {
4447 SVal* tree = &cl->svals[tno << 3];
4448 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4449 }
sewardj8f5374e2008-12-07 11:40:17 +00004450 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004451 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4452 }
4453 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004454 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004455 if (CHECK_ZSM)
4456 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004457 cl->svals[cloff] = svNew;
4458 return;
4459 slowcase: /* misaligned, or must go further down the tree */
4460 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004461 zsm_sapply08__msmcread( thr, a + 0 );
4462 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004463}
4464
sewardj23f12002009-07-24 08:45:08 +00004465static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004466 CacheLine* cl;
4467 UWord cloff, tno, toff;
4468 SVal svOld, svNew;
4469 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004470 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004471 if (UNLIKELY(!aligned16(a))) goto slowcase;
4472 cl = get_cacheline(a);
4473 cloff = get_cacheline_offset(a);
4474 tno = get_treeno(a);
4475 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4476 descr = cl->descrs[tno];
4477 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4478 if (valid_value_is_below_me_16(descr, toff)) {
4479 goto slowcase;
4480 } else {
4481 SVal* tree = &cl->svals[tno << 3];
4482 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4483 }
sewardj8f5374e2008-12-07 11:40:17 +00004484 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004485 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4486 }
4487 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004488 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004489 if (CHECK_ZSM)
4490 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004491 cl->svals[cloff] = svNew;
4492 return;
4493 slowcase: /* misaligned, or must go further down the tree */
4494 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004495 zsm_sapply08__msmcwrite( thr, a + 0 );
4496 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004497}
4498
sewardj23f12002009-07-24 08:45:08 +00004499/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004500
sewardj23f12002009-07-24 08:45:08 +00004501static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004502 CacheLine* cl;
4503 UWord cloff, tno, toff;
4504 SVal svOld, svNew;
4505 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004506 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004507 if (UNLIKELY(!aligned32(a))) goto slowcase;
4508 cl = get_cacheline(a);
4509 cloff = get_cacheline_offset(a);
4510 tno = get_treeno(a);
4511 toff = get_tree_offset(a); /* == 0 or 4 */
4512 descr = cl->descrs[tno];
4513 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4514 if (valid_value_is_above_me_32(descr, toff)) {
4515 SVal* tree = &cl->svals[tno << 3];
4516 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4517 } else {
4518 goto slowcase;
4519 }
sewardj8f5374e2008-12-07 11:40:17 +00004520 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004521 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4522 }
4523 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004524 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004525 if (CHECK_ZSM)
4526 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004527 cl->svals[cloff] = svNew;
4528 return;
4529 slowcase: /* misaligned, or must go further down the tree */
4530 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004531 zsm_sapply16__msmcread( thr, a + 0 );
4532 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004533}
4534
sewardj23f12002009-07-24 08:45:08 +00004535static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004536 CacheLine* cl;
4537 UWord cloff, tno, toff;
4538 SVal svOld, svNew;
4539 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004540 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004541 if (UNLIKELY(!aligned32(a))) goto slowcase;
4542 cl = get_cacheline(a);
4543 cloff = get_cacheline_offset(a);
4544 tno = get_treeno(a);
4545 toff = get_tree_offset(a); /* == 0 or 4 */
4546 descr = cl->descrs[tno];
4547 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4548 if (valid_value_is_above_me_32(descr, toff)) {
4549 SVal* tree = &cl->svals[tno << 3];
4550 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4551 } else {
4552 goto slowcase;
4553 }
sewardj8f5374e2008-12-07 11:40:17 +00004554 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004555 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4556 }
4557 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004558 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004559 if (CHECK_ZSM)
4560 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004561 cl->svals[cloff] = svNew;
4562 return;
4563 slowcase: /* misaligned, or must go further down the tree */
4564 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004565 zsm_sapply16__msmcwrite( thr, a + 0 );
4566 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004567}
4568
sewardj23f12002009-07-24 08:45:08 +00004569/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004570
sewardj23f12002009-07-24 08:45:08 +00004571static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004572 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004573 UWord cloff, tno;
4574 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004575 SVal svOld, svNew;
4576 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004577 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004578 if (UNLIKELY(!aligned64(a))) goto slowcase;
4579 cl = get_cacheline(a);
4580 cloff = get_cacheline_offset(a);
4581 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004582 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004583 descr = cl->descrs[tno];
4584 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4585 goto slowcase;
4586 }
4587 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004588 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004589 if (CHECK_ZSM)
4590 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004591 cl->svals[cloff] = svNew;
4592 return;
4593 slowcase: /* misaligned, or must go further down the tree */
4594 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004595 zsm_sapply32__msmcread( thr, a + 0 );
4596 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004597}
4598
sewardj23f12002009-07-24 08:45:08 +00004599static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004600 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004601 UWord cloff, tno;
4602 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004603 SVal svOld, svNew;
4604 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004605 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004606 if (UNLIKELY(!aligned64(a))) goto slowcase;
4607 cl = get_cacheline(a);
4608 cloff = get_cacheline_offset(a);
4609 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004610 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004611 descr = cl->descrs[tno];
4612 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4613 goto slowcase;
4614 }
4615 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004616 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004617 if (CHECK_ZSM)
4618 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004619 cl->svals[cloff] = svNew;
4620 return;
4621 slowcase: /* misaligned, or must go further down the tree */
4622 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004623 zsm_sapply32__msmcwrite( thr, a + 0 );
4624 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004625}
4626
sewardj23f12002009-07-24 08:45:08 +00004627/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004628
4629static
sewardj23f12002009-07-24 08:45:08 +00004630void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004631 CacheLine* cl;
4632 UWord cloff, tno, toff;
4633 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004634 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004635 cl = get_cacheline(a);
4636 cloff = get_cacheline_offset(a);
4637 tno = get_treeno(a);
4638 toff = get_tree_offset(a); /* == 0 .. 7 */
4639 descr = cl->descrs[tno];
4640 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4641 SVal* tree = &cl->svals[tno << 3];
4642 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004643 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004644 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4645 }
4646 tl_assert(svNew != SVal_INVALID);
4647 cl->svals[cloff] = svNew;
4648}
4649
sewardj23f12002009-07-24 08:45:08 +00004650/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004651
4652static
sewardj23f12002009-07-24 08:45:08 +00004653void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004654 CacheLine* cl;
4655 UWord cloff, tno, toff;
4656 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004657 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004658 if (UNLIKELY(!aligned16(a))) goto slowcase;
4659 cl = get_cacheline(a);
4660 cloff = get_cacheline_offset(a);
4661 tno = get_treeno(a);
4662 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4663 descr = cl->descrs[tno];
4664 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4665 if (valid_value_is_below_me_16(descr, toff)) {
4666 /* Writing at this level. Need to fix up 'descr'. */
4667 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4668 /* At this point, the tree does not match cl->descr[tno] any
4669 more. The assignments below will fix it up. */
4670 } else {
4671 /* We can't indiscriminately write on the w16 node as in the
4672 w64 case, as that might make the node inconsistent with
4673 its parent. So first, pull down to this level. */
4674 SVal* tree = &cl->svals[tno << 3];
4675 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004676 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004677 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4678 }
4679 }
4680 tl_assert(svNew != SVal_INVALID);
4681 cl->svals[cloff + 0] = svNew;
4682 cl->svals[cloff + 1] = SVal_INVALID;
4683 return;
4684 slowcase: /* misaligned */
4685 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004686 zsm_swrite08( a + 0, svNew );
4687 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004688}
4689
sewardj23f12002009-07-24 08:45:08 +00004690/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004691
4692static
sewardj23f12002009-07-24 08:45:08 +00004693void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004694 CacheLine* cl;
4695 UWord cloff, tno, toff;
4696 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004697 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004698 if (UNLIKELY(!aligned32(a))) goto slowcase;
4699 cl = get_cacheline(a);
4700 cloff = get_cacheline_offset(a);
4701 tno = get_treeno(a);
4702 toff = get_tree_offset(a); /* == 0 or 4 */
4703 descr = cl->descrs[tno];
4704 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4705 if (valid_value_is_above_me_32(descr, toff)) {
4706 /* We can't indiscriminately write on the w32 node as in the
4707 w64 case, as that might make the node inconsistent with
4708 its parent. So first, pull down to this level. */
4709 SVal* tree = &cl->svals[tno << 3];
4710 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004711 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004712 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4713 } else {
4714 /* Writing at this level. Need to fix up 'descr'. */
4715 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4716 /* At this point, the tree does not match cl->descr[tno] any
4717 more. The assignments below will fix it up. */
4718 }
4719 }
4720 tl_assert(svNew != SVal_INVALID);
4721 cl->svals[cloff + 0] = svNew;
4722 cl->svals[cloff + 1] = SVal_INVALID;
4723 cl->svals[cloff + 2] = SVal_INVALID;
4724 cl->svals[cloff + 3] = SVal_INVALID;
4725 return;
4726 slowcase: /* misaligned */
4727 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004728 zsm_swrite16( a + 0, svNew );
4729 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004730}
4731
sewardj23f12002009-07-24 08:45:08 +00004732/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004733
4734static
sewardj23f12002009-07-24 08:45:08 +00004735void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004736 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004737 UWord cloff, tno;
4738 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00004739 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004740 if (UNLIKELY(!aligned64(a))) goto slowcase;
4741 cl = get_cacheline(a);
4742 cloff = get_cacheline_offset(a);
4743 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004744 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004745 cl->descrs[tno] = TREE_DESCR_64;
4746 tl_assert(svNew != SVal_INVALID);
4747 cl->svals[cloff + 0] = svNew;
4748 cl->svals[cloff + 1] = SVal_INVALID;
4749 cl->svals[cloff + 2] = SVal_INVALID;
4750 cl->svals[cloff + 3] = SVal_INVALID;
4751 cl->svals[cloff + 4] = SVal_INVALID;
4752 cl->svals[cloff + 5] = SVal_INVALID;
4753 cl->svals[cloff + 6] = SVal_INVALID;
4754 cl->svals[cloff + 7] = SVal_INVALID;
4755 return;
4756 slowcase: /* misaligned */
4757 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004758 zsm_swrite32( a + 0, svNew );
4759 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004760}
4761
sewardj23f12002009-07-24 08:45:08 +00004762/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004763
4764static
sewardj23f12002009-07-24 08:45:08 +00004765SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004766 CacheLine* cl;
4767 UWord cloff, tno, toff;
4768 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004769 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004770 cl = get_cacheline(a);
4771 cloff = get_cacheline_offset(a);
4772 tno = get_treeno(a);
4773 toff = get_tree_offset(a); /* == 0 .. 7 */
4774 descr = cl->descrs[tno];
4775 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4776 SVal* tree = &cl->svals[tno << 3];
4777 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4778 }
4779 return cl->svals[cloff];
4780}
4781
sewardj23f12002009-07-24 08:45:08 +00004782static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00004783 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00004784 stats__cline_scopy08s++;
4785 sv = zsm_sread08( src );
4786 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00004787}
4788
4789
sewardj23f12002009-07-24 08:45:08 +00004790/* Block-copy states (needed for implementing realloc()). Note this
4791 doesn't change the filtering arrangements. The caller of
4792 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00004793
sewardj23f12002009-07-24 08:45:08 +00004794static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00004795{
4796 SizeT i;
4797 if (len == 0)
4798 return;
4799
4800 /* assert for non-overlappingness */
4801 tl_assert(src+len <= dst || dst+len <= src);
4802
4803 /* To be simple, just copy byte by byte. But so as not to wreck
4804 performance for later accesses to dst[0 .. len-1], normalise
4805 destination lines as we finish with them, and also normalise the
4806 line containing the first and last address. */
4807 for (i = 0; i < len; i++) {
4808 Bool normalise
4809 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4810 || i == 0 /* first in range */
4811 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00004812 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00004813 }
4814}
4815
4816
4817/* For setting address ranges to a given value. Has considerable
4818 sophistication so as to avoid generating large numbers of pointless
4819 cache loads/writebacks for large ranges. */
4820
4821/* Do small ranges in-cache, in the obvious way. */
4822static
sewardj23f12002009-07-24 08:45:08 +00004823void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004824{
4825 /* fast track a couple of common cases */
4826 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00004827 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004828 return;
4829 }
4830 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00004831 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004832 return;
4833 }
4834
4835 /* be completely general (but as efficient as possible) */
4836 if (len == 0) return;
4837
4838 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004839 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004840 a += 1;
4841 len -= 1;
4842 tl_assert(aligned16(a));
4843 }
4844 if (len == 0) return;
4845
4846 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004847 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004848 a += 2;
4849 len -= 2;
4850 tl_assert(aligned32(a));
4851 }
4852 if (len == 0) return;
4853
4854 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004855 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004856 a += 4;
4857 len -= 4;
4858 tl_assert(aligned64(a));
4859 }
4860 if (len == 0) return;
4861
4862 if (len >= 8) {
4863 tl_assert(aligned64(a));
4864 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00004865 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004866 a += 8;
4867 len -= 8;
4868 }
4869 tl_assert(aligned64(a));
4870 }
4871 if (len == 0) return;
4872
4873 if (len >= 4)
4874 tl_assert(aligned32(a));
4875 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004876 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004877 a += 4;
4878 len -= 4;
4879 }
4880 if (len == 0) return;
4881
4882 if (len >= 2)
4883 tl_assert(aligned16(a));
4884 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004885 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004886 a += 2;
4887 len -= 2;
4888 }
4889 if (len == 0) return;
4890
4891 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004892 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00004893 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004894 len -= 1;
4895 }
4896 tl_assert(len == 0);
4897}
4898
4899
sewardj23f12002009-07-24 08:45:08 +00004900/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00004901 for larger ranges, try to operate directly on the out-of-cache
4902 representation, rather than dragging lines into the cache,
4903 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00004904 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00004905
sewardj23f12002009-07-24 08:45:08 +00004906 Note that this doesn't change the filtering arrangements. The
4907 caller of zsm_sset_range needs to attend to that. */
4908
4909static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004910{
4911 tl_assert(svNew != SVal_INVALID);
4912 stats__cache_make_New_arange += (ULong)len;
4913
4914 if (0 && len > 500)
4915 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4916
4917 if (0) {
4918 static UWord n_New_in_cache = 0;
4919 static UWord n_New_not_in_cache = 0;
4920 /* tag is 'a' with the in-line offset masked out,
4921 eg a[31]..a[4] 0000 */
4922 Addr tag = a & ~(N_LINE_ARANGE - 1);
4923 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4924 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4925 n_New_in_cache++;
4926 } else {
4927 n_New_not_in_cache++;
4928 }
4929 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4930 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4931 n_New_in_cache, n_New_not_in_cache );
4932 }
4933
4934 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00004935 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004936 } else {
4937 Addr before_start = a;
4938 Addr aligned_start = cacheline_ROUNDUP(a);
4939 Addr after_start = cacheline_ROUNDDN(a + len);
4940 UWord before_len = aligned_start - before_start;
4941 UWord aligned_len = after_start - aligned_start;
4942 UWord after_len = a + len - after_start;
4943 tl_assert(before_start <= aligned_start);
4944 tl_assert(aligned_start <= after_start);
4945 tl_assert(before_len < N_LINE_ARANGE);
4946 tl_assert(after_len < N_LINE_ARANGE);
4947 tl_assert(get_cacheline_offset(aligned_start) == 0);
4948 if (get_cacheline_offset(a) == 0) {
4949 tl_assert(before_len == 0);
4950 tl_assert(a == aligned_start);
4951 }
4952 if (get_cacheline_offset(a+len) == 0) {
4953 tl_assert(after_len == 0);
4954 tl_assert(after_start == a+len);
4955 }
4956 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004957 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004958 }
4959 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004960 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004961 }
4962 stats__cache_make_New_inZrep += (ULong)aligned_len;
4963
4964 while (1) {
4965 Addr tag;
4966 UWord wix;
4967 if (aligned_start >= after_start)
4968 break;
4969 tl_assert(get_cacheline_offset(aligned_start) == 0);
4970 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4971 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4972 if (tag == cache_shmem.tags0[wix]) {
4973 UWord i;
4974 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00004975 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004976 } else {
4977 UWord i;
4978 Word zix;
4979 SecMap* sm;
4980 LineZ* lineZ;
4981 /* This line is not in the cache. Do not force it in; instead
4982 modify it in-place. */
4983 /* find the Z line to write in and rcdec it or the
4984 associated F line. */
4985 find_Z_for_writing( &sm, &zix, tag );
4986 tl_assert(sm);
4987 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4988 lineZ = &sm->linesZ[zix];
4989 lineZ->dict[0] = svNew;
4990 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4991 for (i = 0; i < N_LINE_ARANGE/4; i++)
4992 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4993 rcinc_LineZ(lineZ);
4994 }
4995 aligned_start += N_LINE_ARANGE;
4996 aligned_len -= N_LINE_ARANGE;
4997 }
4998 tl_assert(aligned_start == after_start);
4999 tl_assert(aligned_len == 0);
5000 }
5001}
5002
5003
5004/////////////////////////////////////////////////////////
5005// //
sewardj23f12002009-07-24 08:45:08 +00005006// Front-filtering accesses //
5007// //
5008/////////////////////////////////////////////////////////
5009
5010static UWord stats__f_ac = 0;
5011static UWord stats__f_sk = 0;
5012
5013#if 0
5014# define STATS__F_SHOW \
5015 do { \
5016 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5017 VG_(printf)("filters: ac %lu sk %lu\n", \
5018 stats__f_ac, stats__f_sk); \
5019 } while (0)
5020#else
5021# define STATS__F_SHOW /* */
5022#endif
5023
5024void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5025 stats__f_ac++;
5026 STATS__F_SHOW;
5027 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5028 stats__f_sk++;
5029 return;
5030 }
5031 zsm_sapply08__msmcwrite(thr, a);
5032}
5033
5034void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5035 stats__f_ac++;
5036 STATS__F_SHOW;
5037 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5038 stats__f_sk++;
5039 return;
5040 }
5041 zsm_sapply16__msmcwrite(thr, a);
5042}
5043
5044void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5045 stats__f_ac++;
5046 STATS__F_SHOW;
5047 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5048 stats__f_sk++;
5049 return;
5050 }
5051 zsm_sapply32__msmcwrite(thr, a);
5052}
5053
5054void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5055 stats__f_ac++;
5056 STATS__F_SHOW;
5057 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5058 stats__f_sk++;
5059 return;
5060 }
5061 zsm_sapply64__msmcwrite(thr, a);
5062}
5063
5064void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5065{
5066 /* fast track a couple of common cases */
5067 if (len == 4 && aligned32(a)) {
5068 zsm_sapply32_f__msmcwrite( thr, a );
5069 return;
5070 }
5071 if (len == 8 && aligned64(a)) {
5072 zsm_sapply64_f__msmcwrite( thr, a );
5073 return;
5074 }
5075
5076 /* be completely general (but as efficient as possible) */
5077 if (len == 0) return;
5078
5079 if (!aligned16(a) && len >= 1) {
5080 zsm_sapply08_f__msmcwrite( thr, a );
5081 a += 1;
5082 len -= 1;
5083 tl_assert(aligned16(a));
5084 }
5085 if (len == 0) return;
5086
5087 if (!aligned32(a) && len >= 2) {
5088 zsm_sapply16_f__msmcwrite( thr, a );
5089 a += 2;
5090 len -= 2;
5091 tl_assert(aligned32(a));
5092 }
5093 if (len == 0) return;
5094
5095 if (!aligned64(a) && len >= 4) {
5096 zsm_sapply32_f__msmcwrite( thr, a );
5097 a += 4;
5098 len -= 4;
5099 tl_assert(aligned64(a));
5100 }
5101 if (len == 0) return;
5102
5103 if (len >= 8) {
5104 tl_assert(aligned64(a));
5105 while (len >= 8) {
5106 zsm_sapply64_f__msmcwrite( thr, a );
5107 a += 8;
5108 len -= 8;
5109 }
5110 tl_assert(aligned64(a));
5111 }
5112 if (len == 0) return;
5113
5114 if (len >= 4)
5115 tl_assert(aligned32(a));
5116 if (len >= 4) {
5117 zsm_sapply32_f__msmcwrite( thr, a );
5118 a += 4;
5119 len -= 4;
5120 }
5121 if (len == 0) return;
5122
5123 if (len >= 2)
5124 tl_assert(aligned16(a));
5125 if (len >= 2) {
5126 zsm_sapply16_f__msmcwrite( thr, a );
5127 a += 2;
5128 len -= 2;
5129 }
5130 if (len == 0) return;
5131
5132 if (len >= 1) {
5133 zsm_sapply08_f__msmcwrite( thr, a );
5134 //a += 1;
5135 len -= 1;
5136 }
5137 tl_assert(len == 0);
5138}
5139
5140void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5141 stats__f_ac++;
5142 STATS__F_SHOW;
5143 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5144 stats__f_sk++;
5145 return;
5146 }
5147 zsm_sapply08__msmcread(thr, a);
5148}
5149
5150void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5151 stats__f_ac++;
5152 STATS__F_SHOW;
5153 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5154 stats__f_sk++;
5155 return;
5156 }
5157 zsm_sapply16__msmcread(thr, a);
5158}
5159
5160void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5161 stats__f_ac++;
5162 STATS__F_SHOW;
5163 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5164 stats__f_sk++;
5165 return;
5166 }
5167 zsm_sapply32__msmcread(thr, a);
5168}
5169
5170void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5171 stats__f_ac++;
5172 STATS__F_SHOW;
5173 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5174 stats__f_sk++;
5175 return;
5176 }
5177 zsm_sapply64__msmcread(thr, a);
5178}
5179
5180void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5181{
5182 /* fast track a couple of common cases */
5183 if (len == 4 && aligned32(a)) {
5184 zsm_sapply32_f__msmcread( thr, a );
5185 return;
5186 }
5187 if (len == 8 && aligned64(a)) {
5188 zsm_sapply64_f__msmcread( thr, a );
5189 return;
5190 }
5191
5192 /* be completely general (but as efficient as possible) */
5193 if (len == 0) return;
5194
5195 if (!aligned16(a) && len >= 1) {
5196 zsm_sapply08_f__msmcread( thr, a );
5197 a += 1;
5198 len -= 1;
5199 tl_assert(aligned16(a));
5200 }
5201 if (len == 0) return;
5202
5203 if (!aligned32(a) && len >= 2) {
5204 zsm_sapply16_f__msmcread( thr, a );
5205 a += 2;
5206 len -= 2;
5207 tl_assert(aligned32(a));
5208 }
5209 if (len == 0) return;
5210
5211 if (!aligned64(a) && len >= 4) {
5212 zsm_sapply32_f__msmcread( thr, a );
5213 a += 4;
5214 len -= 4;
5215 tl_assert(aligned64(a));
5216 }
5217 if (len == 0) return;
5218
5219 if (len >= 8) {
5220 tl_assert(aligned64(a));
5221 while (len >= 8) {
5222 zsm_sapply64_f__msmcread( thr, a );
5223 a += 8;
5224 len -= 8;
5225 }
5226 tl_assert(aligned64(a));
5227 }
5228 if (len == 0) return;
5229
5230 if (len >= 4)
5231 tl_assert(aligned32(a));
5232 if (len >= 4) {
5233 zsm_sapply32_f__msmcread( thr, a );
5234 a += 4;
5235 len -= 4;
5236 }
5237 if (len == 0) return;
5238
5239 if (len >= 2)
5240 tl_assert(aligned16(a));
5241 if (len >= 2) {
5242 zsm_sapply16_f__msmcread( thr, a );
5243 a += 2;
5244 len -= 2;
5245 }
5246 if (len == 0) return;
5247
5248 if (len >= 1) {
5249 zsm_sapply08_f__msmcread( thr, a );
5250 //a += 1;
5251 len -= 1;
5252 }
5253 tl_assert(len == 0);
5254}
5255
5256void libhb_Thr_resumes ( Thr* thr )
5257{
5258 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005259 tl_assert(thr);
5260 tl_assert(thr->still_alive);
sewardj23f12002009-07-24 08:45:08 +00005261 Filter__clear(thr->filter, "libhb_Thr_resumes");
5262 /* A kludge, but .. if this thread doesn't have any marker stacks
5263 at all, get one right now. This is easier than figuring out
5264 exactly when at thread startup we can and can't take a stack
5265 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005266 if (HG_(clo_history_level) == 1) {
5267 tl_assert(thr->local_Kws_n_stacks);
5268 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5269 note_local_Kw_n_stack_for(thr);
5270 }
sewardj23f12002009-07-24 08:45:08 +00005271}
5272
5273
5274/////////////////////////////////////////////////////////
5275// //
sewardjf98e1c02008-10-25 16:22:41 +00005276// Synchronisation objects //
5277// //
5278/////////////////////////////////////////////////////////
5279
5280// (UInt) `echo "Synchronisation object" | md5sum`
5281#define SO_MAGIC 0x56b3c5b0U
5282
5283struct _SO {
5284 VtsID viR; /* r-clock of sender */
5285 VtsID viW; /* w-clock of sender */
5286 UInt magic;
5287};
5288
5289static SO* SO__Alloc ( void ) {
5290 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5291 so->viR = VtsID_INVALID;
5292 so->viW = VtsID_INVALID;
5293 so->magic = SO_MAGIC;
5294 return so;
5295}
5296static void SO__Dealloc ( SO* so ) {
5297 tl_assert(so);
5298 tl_assert(so->magic == SO_MAGIC);
5299 if (so->viR == VtsID_INVALID) {
5300 tl_assert(so->viW == VtsID_INVALID);
5301 } else {
5302 tl_assert(so->viW != VtsID_INVALID);
5303 VtsID__rcdec(so->viR);
5304 VtsID__rcdec(so->viW);
5305 }
5306 so->magic = 0;
5307 HG_(free)( so );
5308}
5309
5310
5311/////////////////////////////////////////////////////////
5312// //
5313// Top Level API //
5314// //
5315/////////////////////////////////////////////////////////
5316
5317static void show_thread_state ( HChar* str, Thr* t )
5318{
5319 if (1) return;
5320 if (t->viR == t->viW) {
5321 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5322 VtsID__pp( t->viR );
5323 VG_(printf)("%s","\n");
5324 } else {
5325 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5326 VtsID__pp( t->viR );
5327 VG_(printf)(" viW %u==", t->viW);
5328 VtsID__pp( t->viW );
5329 VG_(printf)("%s","\n");
5330 }
5331}
5332
5333
5334Thr* libhb_init (
5335 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005336 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005337 )
5338{
5339 Thr* thr;
5340 VtsID vi;
5341 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005342 tl_assert(get_EC);
5343 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005344 main_get_EC = get_EC;
5345
5346 // No need to initialise hg_wordfm.
5347 // No need to initialise hg_wordset.
5348
5349 vts_set_init();
5350 vts_tab_init();
5351 event_map_init();
5352 VtsID__invalidate_caches();
5353
5354 // initialise shadow memory
5355 zsm_init( SVal__rcinc, SVal__rcdec );
5356
5357 thr = Thr__new();
5358 vi = VtsID__mk_Singleton( thr, 1 );
5359 thr->viR = vi;
5360 thr->viW = vi;
5361 VtsID__rcinc(thr->viR);
5362 VtsID__rcinc(thr->viW);
5363
5364 show_thread_state(" root", thr);
5365 return thr;
5366}
5367
sewardj23f12002009-07-24 08:45:08 +00005368
sewardjf98e1c02008-10-25 16:22:41 +00005369Thr* libhb_create ( Thr* parent )
5370{
5371 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5372 the child's index. Since the child's index is guaranteed
5373 unique, it has never been seen before, so the implicit value
5374 before the tick is zero and after that is one. */
5375 Thr* child = Thr__new();
5376
5377 child->viR = VtsID__tick( parent->viR, child );
5378 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005379 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005380 VtsID__rcinc(child->viR);
5381 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005382 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00005383 early for that - it may not have a valid TId yet. So, let
5384 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005385
5386 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5387 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5388
5389 /* and the parent has to move along too */
5390 VtsID__rcdec(parent->viR);
5391 VtsID__rcdec(parent->viW);
5392 parent->viR = VtsID__tick( parent->viR, parent );
5393 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005394 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005395 VtsID__rcinc(parent->viR);
5396 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005397 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005398
5399 show_thread_state(" child", child);
5400 show_thread_state("parent", parent);
5401
5402 return child;
5403}
5404
5405/* Shut down the library, and print stats (in fact that's _all_
5406 this is for. */
5407void libhb_shutdown ( Bool show_stats )
5408{
5409 if (show_stats) {
5410 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5411 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5412 stats__secmaps_allocd,
5413 stats__secmap_ga_space_covered);
5414 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5415 stats__secmap_linesZ_allocd,
5416 stats__secmap_linesZ_bytes);
5417 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5418 stats__secmap_linesF_allocd,
5419 stats__secmap_linesF_bytes);
5420 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5421 stats__secmap_iterator_steppings);
5422 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5423 stats__secmaps_search, stats__secmaps_search_slow);
5424
5425 VG_(printf)("%s","\n");
5426 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5427 stats__cache_totrefs, stats__cache_totmisses );
5428 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5429 stats__cache_Z_fetches, stats__cache_F_fetches );
5430 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5431 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5432 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5433 stats__cache_invals, stats__cache_flushes );
5434 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5435 stats__cache_make_New_arange,
5436 stats__cache_make_New_inZrep);
5437
5438 VG_(printf)("%s","\n");
5439 VG_(printf)(" cline: %'10lu normalises\n",
5440 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00005441 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5442 stats__cline_cread64s,
5443 stats__cline_cread32s,
5444 stats__cline_cread16s,
5445 stats__cline_cread08s );
5446 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5447 stats__cline_cwrite64s,
5448 stats__cline_cwrite32s,
5449 stats__cline_cwrite16s,
5450 stats__cline_cwrite08s );
5451 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5452 stats__cline_swrite64s,
5453 stats__cline_swrite32s,
5454 stats__cline_swrite16s,
5455 stats__cline_swrite08s );
5456 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5457 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00005458 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5459 stats__cline_64to32splits,
5460 stats__cline_32to16splits,
5461 stats__cline_16to8splits );
5462 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5463 stats__cline_64to32pulldown,
5464 stats__cline_32to16pulldown,
5465 stats__cline_16to8pulldown );
5466 if (0)
5467 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5468 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5469
5470 VG_(printf)("%s","\n");
5471
sewardj23f12002009-07-24 08:45:08 +00005472 VG_(printf)(" libhb: %'13llu msmcread (%'llu changed)\n",
5473 stats__msmcread, stats__msmcread_change);
5474 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu changed)\n",
5475 stats__msmcwrite, stats__msmcwrite_change);
5476 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5477 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00005478 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5479 stats__join2_queries, stats__join2_misses);
5480
5481 VG_(printf)("%s","\n");
5482 VG_(printf)(
5483 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5484 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5485 );
5486 VG_(printf)( " libhb: %lu entries in vts_set\n",
5487 VG_(sizeFM)( vts_set ) );
5488
5489 VG_(printf)("%s","\n");
5490 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5491 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5492 stats__ctxt_rcdec2,
5493 stats__ctxt_rcdec3 );
5494 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5495 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5496 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5497 (UWord)N_RCEC_TAB,
5498 stats__ctxt_tab_curr );
5499 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5500 stats__ctxt_tab_qs,
5501 stats__ctxt_tab_cmps );
5502#if 0
5503 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5504 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5505 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5506 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5507 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5508 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5509 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5510 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5511 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5512 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5513 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5514 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5515 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5516 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5517
5518 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5519 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5520 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5521 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5522#endif
5523
5524 VG_(printf)("%s","<<< END libhb stats >>>\n");
5525 VG_(printf)("%s","\n");
5526
5527 }
5528}
5529
5530void libhb_async_exit ( Thr* thr )
5531{
sewardj23f12002009-07-24 08:45:08 +00005532 tl_assert(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005533 tl_assert(thr->still_alive);
sewardj23f12002009-07-24 08:45:08 +00005534 thr->still_alive = False;
sewardj2d2ea2f2009-08-02 10:15:07 +00005535
5536 /* free up Filter and local_Kws_n_stacks (well, actually not the
5537 latter ..) */
5538 tl_assert(thr->filter);
5539 HG_(free)(thr->filter);
5540 thr->filter = NULL;
5541
5542 /* Another space-accuracy tradeoff. Do we want to be able to show
5543 H1 history for conflicts in threads which have since exited? If
5544 yes, then we better not free up thr->local_Kws_n_stacks. The
5545 downside is a potential per-thread leak of up to
5546 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
5547 XArray average overcommit factor is (1.5 I'd guess). */
5548 // hence:
5549 // VG_(deleteXA)(thr->local_Kws_n_stacks);
5550 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00005551}
5552
5553/* Both Segs and SOs point to VTSs. However, there is no sharing, so
5554 a Seg that points at a VTS is its one-and-only owner, and ditto for
5555 a SO that points at a VTS. */
5556
5557SO* libhb_so_alloc ( void )
5558{
5559 return SO__Alloc();
5560}
5561
5562void libhb_so_dealloc ( SO* so )
5563{
5564 tl_assert(so);
5565 tl_assert(so->magic == SO_MAGIC);
5566 SO__Dealloc(so);
5567}
5568
5569/* See comments in libhb.h for details on the meaning of
5570 strong vs weak sends and strong vs weak receives. */
5571void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5572{
5573 /* Copy the VTSs from 'thr' into the sync object, and then move
5574 the thread along one step. */
5575
5576 tl_assert(so);
5577 tl_assert(so->magic == SO_MAGIC);
5578
5579 /* stay sane .. a thread's read-clock must always lead or be the
5580 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00005581 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5582 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00005583 }
5584
5585 /* since we're overwriting the VtsIDs in the SO, we need to drop
5586 any references made by the previous contents thereof */
5587 if (so->viR == VtsID_INVALID) {
5588 tl_assert(so->viW == VtsID_INVALID);
5589 so->viR = thr->viR;
5590 so->viW = thr->viW;
5591 VtsID__rcinc(so->viR);
5592 VtsID__rcinc(so->viW);
5593 } else {
5594 /* In a strong send, we dump any previous VC in the SO and
5595 install the sending thread's VC instead. For a weak send we
5596 must join2 with what's already there. */
5597 tl_assert(so->viW != VtsID_INVALID);
5598 VtsID__rcdec(so->viR);
5599 VtsID__rcdec(so->viW);
5600 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5601 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5602 VtsID__rcinc(so->viR);
5603 VtsID__rcinc(so->viW);
5604 }
5605
5606 /* move both parent clocks along */
5607 VtsID__rcdec(thr->viR);
5608 VtsID__rcdec(thr->viW);
5609 thr->viR = VtsID__tick( thr->viR, thr );
5610 thr->viW = VtsID__tick( thr->viW, thr );
sewardj2d2ea2f2009-08-02 10:15:07 +00005611 if (thr->still_alive) {
5612 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00005613 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005614 }
sewardjf98e1c02008-10-25 16:22:41 +00005615 VtsID__rcinc(thr->viR);
5616 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005617
sewardjf98e1c02008-10-25 16:22:41 +00005618 if (strong_send)
5619 show_thread_state("s-send", thr);
5620 else
5621 show_thread_state("w-send", thr);
5622}
5623
5624void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5625{
5626 tl_assert(so);
5627 tl_assert(so->magic == SO_MAGIC);
5628
5629 if (so->viR != VtsID_INVALID) {
5630 tl_assert(so->viW != VtsID_INVALID);
5631
5632 /* Weak receive (basically, an R-acquisition of a R-W lock).
5633 This advances the read-clock of the receiver, but not the
5634 write-clock. */
5635 VtsID__rcdec(thr->viR);
5636 thr->viR = VtsID__join2( thr->viR, so->viR );
5637 VtsID__rcinc(thr->viR);
5638
sewardj90eb22e2009-07-28 20:22:18 +00005639 /* At one point (r10589) it seemed safest to tick the clocks for
5640 the receiving thread after the join. But on reflection, I
5641 wonder if that might cause it to 'overtake' constraints,
5642 which could lead to missing races. So, back out that part of
5643 r10589. */
5644 //VtsID__rcdec(thr->viR);
5645 //thr->viR = VtsID__tick( thr->viR, thr );
5646 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00005647
sewardjf98e1c02008-10-25 16:22:41 +00005648 /* For a strong receive, we also advance the receiver's write
5649 clock, which means the receive as a whole is essentially
5650 equivalent to a W-acquisition of a R-W lock. */
5651 if (strong_recv) {
5652 VtsID__rcdec(thr->viW);
5653 thr->viW = VtsID__join2( thr->viW, so->viW );
5654 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005655
sewardj90eb22e2009-07-28 20:22:18 +00005656 /* See comment just above, re r10589. */
5657 //VtsID__rcdec(thr->viW);
5658 //thr->viW = VtsID__tick( thr->viW, thr );
5659 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00005660 }
5661
sewardj23f12002009-07-24 08:45:08 +00005662 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00005663 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00005664
sewardjf98e1c02008-10-25 16:22:41 +00005665 if (strong_recv)
5666 show_thread_state("s-recv", thr);
5667 else
5668 show_thread_state("w-recv", thr);
5669
5670 } else {
5671 tl_assert(so->viW == VtsID_INVALID);
5672 /* Deal with degenerate case: 'so' has no vts, so there has been
5673 no message posted to it. Just ignore this case. */
5674 show_thread_state("d-recv", thr);
5675 }
5676}
5677
5678Bool libhb_so_everSent ( SO* so )
5679{
5680 if (so->viR == VtsID_INVALID) {
5681 tl_assert(so->viW == VtsID_INVALID);
5682 return False;
5683 } else {
5684 tl_assert(so->viW != VtsID_INVALID);
5685 return True;
5686 }
5687}
5688
5689#define XXX1 0 // 0x67a106c
5690#define XXX2 0
5691
sewardj23f12002009-07-24 08:45:08 +00005692static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00005693 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5694 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5695 return False;
5696}
5697static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00005698 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00005699 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5700 show_thread_state("", thr);
5701 VG_(printf)("%s","\n");
5702}
5703
sewardj23f12002009-07-24 08:45:08 +00005704void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005705{
5706 SVal sv = SVal__mkC(thr->viW, thr->viW);
5707 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00005708 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5709 zsm_sset_range( a, szB, sv );
5710 Filter__clear_range( thr->filter, a, szB );
5711 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00005712}
5713
sewardj23f12002009-07-24 08:45:08 +00005714void libhb_srange_noaccess ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005715{
sewardj23f12002009-07-24 08:45:08 +00005716 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00005717}
5718
sewardj406bac82010-03-03 23:03:40 +00005719void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
5720{
5721 SVal sv = SVal_NOACCESS;
5722 tl_assert(is_sane_SVal_C(sv));
5723 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
5724 zsm_sset_range( a, szB, sv );
5725 Filter__clear_range( thr->filter, a, szB );
5726 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
5727}
5728
sewardjf98e1c02008-10-25 16:22:41 +00005729void* libhb_get_Thr_opaque ( Thr* thr ) {
5730 tl_assert(thr);
5731 return thr->opaque;
5732}
5733
5734void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
5735 tl_assert(thr);
5736 thr->opaque = v;
5737}
5738
sewardj23f12002009-07-24 08:45:08 +00005739void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005740{
sewardj23f12002009-07-24 08:45:08 +00005741 zsm_scopy_range(src, dst, len);
5742 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00005743}
5744
5745void libhb_maybe_GC ( void )
5746{
5747 event_map_maybe_GC();
5748 /* If there are still freelist entries available, no need for a
5749 GC. */
5750 if (vts_tab_freelist != VtsID_INVALID)
5751 return;
5752 /* So all the table entries are full, and we're having to expand
5753 the table. But did we hit the threshhold point yet? */
5754 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5755 return;
5756 vts_tab__do_GC( False/*don't show stats*/ );
5757}
5758
5759
5760/////////////////////////////////////////////////////////////////
5761/////////////////////////////////////////////////////////////////
5762// //
5763// SECTION END main library //
5764// //
5765/////////////////////////////////////////////////////////////////
5766/////////////////////////////////////////////////////////////////
5767
5768/*--------------------------------------------------------------------*/
5769/*--- end libhb_main.c ---*/
5770/*--------------------------------------------------------------------*/